Пример #1
0
def test_g646_accuracy():
    # Contact Parsing.
    contact_xml_list = bs4(GET_646, 'lxml').findAll('contact')
    first, last = parse_candidate_name(contact_xml_list)
    phones = parse_candidate_phones(contact_xml_list)
    addresses = parse_candidate_addresses(contact_xml_list)
    assert first == 'Patrick'
    assert last == 'Kaldawy'
    assert GET_646_ADDRESS in addresses
    assert {'value': u'+18583531111', 'label': 'Home'} in phones
    assert {'value': u'+18583532222', 'label': 'Mobile'} in phones
    assert {'value': u'+18583535555', 'label': 'Work'} in phones
    assert {'value': u'+18583533333', 'label': 'Work'} in phones
    assert {'value': u'+18583534444', 'label': 'Home Fax'} in phones
    assert {'value': u'+96170345340', 'label': 'Mobile'} in phones
    # Experience parsing.
    experience_xml_list = bs4(GET_646, 'lxml').findAll('experience')
    experiences = parse_candidate_experiences(experience_xml_list)
    for exp in experiences:
        assert len(exp['bullets']) == 1
    # Name is currently not grabbed.
    # exp1 = next((org for org in experiences if org["organization"] == u'Technical Difference'), None)
    exp2 = next((org for org in experiences
                 if org["organization"] == u'Convergence Inc. Llc'), None)
    exp3 = next(
        (org for org in experiences
         if org["organization"] == u'Avalon Digital Marketing Systems, Inc'),
        None)
    # The following returns the org name without the division in parens.
    # exp4 = next((org for org in experiences if org["organization"] == u'Avalon Digital Marketing Systems, Inc (European Division'), None)
    # assert None not in [exp1, exp2, exp3, exp4]
    assert None not in [exp2, exp3]
    assert exp2['start_month'] == 3
    assert exp2['start_year'] == 2004
    assert exp2['end_month'] == 9
    assert exp2['end_year'] == 2004
    assert exp3['start_year'] == 2002
    assert exp3['end_year'] == 2003
    # Educations.
    educations_xml_list = bs4(GET_646, 'lxml').findAll('education')
    educations = parse_candidate_educations(educations_xml_list)
    # edu1 = next((edu for edu in educations if edu["school_name"] == u'California State University, Chico'), None)
    # assert edu1
    edu2 = next(
        (edu for edu in educations if edu["school_name"] == u'Butte College'),
        None)
    assert edu2
    assert {
        'start_month': 1,
        'end_month': 1,
        'start_year': 1995,
        'bullets': [{
            'major': None,
            'comments': None
        }],
        'title': u'A.A',
        'gpa_num': None,
        'end_year': 1996,
        'type': 'Associate of Arts'
    } in edu2['degrees']
Пример #2
0
def test_g626b_accuracy():
    # Contact Parsing.
    contact_xml_list = bs4(GET_626b, 'lxml').findAll('contact')
    first, last = parse_candidate_name(contact_xml_list)
    phones = parse_candidate_phones(contact_xml_list)
    addresses = parse_candidate_addresses(contact_xml_list)
    assert first == 'Kate'
    assert last == 'Begonia'
    assert GET_626b_ADDRESS in addresses
    # assert {'value': u'503.493.1548'} in phones
    # Experience parsing.
    experience_xml_list = bs4(GET_626b, 'lxml').findAll('experience')
    experiences = parse_candidate_experiences(experience_xml_list)
    for exp in experiences:
        assert len(exp['bullets']) == 1
    # Below does not parse positions after , in Director
    # exp1 = next((org for org in experiences if (
    #     org["organization"] == u'Sage Software' and
    #     org['position'] == u'Director, Digital Marketing Communications')), None)
    # exp2 = next((org for org in experiences if (
    #     org["organization"] == u'Sage Software' and
    #     org['position'] == u'Director, Creative Services')), None)
    # The below is for a self employed consulting job.
    # exp3 = next((org for org in experiences if org["organization"] == u'None'), None)
    # exp4 = next((org for org in experiences if (
    #     org["organization"] == u'Oracle Corporation' and
    #     org['position'] == u'Senior Director, Branding')), None)
    # exp5 = next((org for org in experiences if (
    #     org["organization"] == u'Oracle Corporation' and
    #     org['position'] == u'Senior Director, Global Advertising')), None)
    # exp6 = next((org for org in experiences if (
    #     org["organization"] == u'Oracle Corporation' and
    #     org['position'] == u'Director, Global Advertising and Direct Marketing Programs')), None)
    # exp7 = next((org for org in experiences if (
    #     org["organization"] == u'Oracle Corporation' and
    #     org['position'] == u'Senior Advertising Manager, Domestic Advertising')), None)
    # exp8 = next((org for org in experiences if (
    #     org["organization"] == u'Oracle Corporation' and
    #     org['position'] == u'Marketing Manager, Industry Solutions Marketing')), None)
    # Educations.
    educations_xml_list = bs4(GET_626b, 'lxml').findAll('education')
    educations = parse_candidate_educations(educations_xml_list)
    edu1 = next((edu for edu in educations
                 if edu["school_name"] == u'San Francisco State University'),
                None)
    assert edu1
    assert {
        'start_month': None,
        'end_month': None,
        'start_year': None,
        'bullets': [{
            'major': u'Humanities',
            'comments': None
        }],
        'title': u'Bachelor of Arts',
        'gpa_num': None,
        'end_year': None,
        'type': "Bachelor's"
    } in edu1['degrees']
Пример #3
0
def test_parsing_edu_combinations():
    xml_combos = [xml for xml in dir(edu_combinations) if "__" not in xml]
    for combo in xml_combos:
        combo_to_parse = bs4(getattr(edu_combinations, combo),
                             'lxml').findAll('education')
        educations = parse_candidate_educations(combo_to_parse)
        for education in educations:
            assert validate(
                education, EDU_SCHEMA, format_checker=FormatChecker()) is None
Пример #4
0
def test_education_parsing():
    """
        Tests parsing function using the JSON response to avoid un-needed API calls
        1. Test proper count.
        2. Test that each item has the correct education keys.
    """
    for xml in XML_MAPS:
        resume = xml['tree_name']
        educations_xml_list = bs4(resume, 'lxml').findAll('education')
        educations = parse_candidate_educations(educations_xml_list)
        # Verify Expected Length
        assert len(educations) == xml['education_len']
        # Very each dict in list has proper keys
        for education in educations:
            assert validate(
                education, EDU_SCHEMA, format_checker=FormatChecker()) is None
Пример #5
0
def test_pdf13_accuracy():
    # Contact Parsing.
    contact_xml_list = bs4(PDF_13, 'lxml').findAll('contact')
    first, last = parse_candidate_name(contact_xml_list)
    phones = parse_candidate_phones(contact_xml_list)
    assert first == 'Bruce'
    assert last == 'Parkey'
    assert {'value': u'+16309302756', 'label': 'Other'} in phones
    experience_xml_list = bs4(PDF_13, 'lxml').findAll('experience')
    experiences = parse_candidate_experiences(experience_xml_list)
    for exp in experiences:
        assert len(exp['bullets']) == 1
    exp1 = next((org for org in experiences
                 if org["organization"] == u'Sagamore Apps, Inc'), None)
    assert exp1
    # assert exp1['position'] == u'Owner and Senior iOS Contract Developer'
    assert exp1['start_month'] == 1
    assert exp1['start_year'] == 2008
    assert exp1['city'] == u'Darien'
    assert exp1['state'] == u'IL'
    exp2 = next((org for org in experiences if (
        org["organization"] == u'Rapid Solutions Group' and org['position'] ==
        u'Vice President and Chief Information Officer')), None)
    assert exp2
    assert exp2['start_month'] == 1
    assert exp2['start_year'] == 2003
    assert exp2['end_month'] == 1
    assert exp2['end_year'] == 2007
    assert exp2['city'] == u'Mt. Prospect'
    assert exp2['state'] == u'IL'
    exp3 = next(
        (org
         for org in experiences if org["organization"] == u'Ams Direct, Inc'),
        None)
    assert exp3
    # assert exp3['position'] == u'Vice President Information Technology'
    assert exp3['start_month'] == 1
    assert exp3['start_year'] == 1999
    assert exp3['end_month'] == 1
    assert exp3['end_year'] == 2003
    assert exp3['city'] == u'Burr Ridge'
    assert exp3['state'] == u'IL'
    educations_xml_list = bs4(PDF_13, 'lxml').findAll('education')
    educations = parse_candidate_educations(educations_xml_list)
    edu1 = next(
        (edu
         for edu in educations if edu["school_name"] == u'Purdue University'),
        None)
    assert edu1
    assert {
        'start_month': None,
        'end_month': None,
        'start_year': None,
        'bullets': [{
            'major': u'Information Systems',
            'comments': None
        }],
        'title': u'Bachelor of Science',
        'gpa_num': None,
        'end_year': None,
        'type': "Bachelor's"
    } in edu1['degrees']
Пример #6
0
def test_pdf_accuracy():
    # Contact Parsing.
    contact_xml_list = bs4(PDF, 'lxml').findAll('contact')
    first, last = parse_candidate_name(contact_xml_list)
    phones = parse_candidate_phones(contact_xml_list)
    addresses = parse_candidate_addresses(contact_xml_list)
    assert first == 'Mark'
    assert last == 'Greene'
    assert PDF_ADDRESS in addresses
    assert {'value': u'+17275651234', 'label': 'Other'} in phones
    experience_xml_list = bs4(PDF, 'lxml').findAll('experience')
    experiences = parse_candidate_experiences(experience_xml_list)
    for exp in experiences:
        assert len(exp['bullets']) == 1
    # exp1 = next((org for org in experiences if (
    #     org["organization"] == u'SmartSource' and
    #     org['position'] == u'Technical Support')), None)
    # exp2 = next((org for org in experiences if (
    #     org["organization"] == u'Aerotek, Bank of America' and
    #     org['position'] == u'Mortgage Affiliate Services')), None)
    # TODO: look into raw (JCIII & Associates)
    # exp3 = next((org for org in experiences if (
    #     org["organization"] == u'JCIII & Associates' and
    #     org['position'] == u'Document Reviewer')), None)
    exp4 = next((org for org in experiences
                 if (org["organization"] == u'CHASE'
                     and org['position'] == u'Sr. Loan Processor')), None)
    assert exp4
    assert exp4['start_month'] == 5
    assert exp4['start_year'] == 2012
    assert exp4['end_month'] == 10
    assert exp4['end_year'] == 2012
    assert exp4['city'] == u'Tampa'
    assert exp4['state'] == u'FL'
    exp5 = next((org for org in experiences
                 if (org["organization"] == u'CHASE' and org['position'] ==
                     u'Business Analyst/Loss Mitigation Specialist')), None)
    assert exp5
    assert exp5['start_month'] == 7
    assert exp5['start_year'] == 2010
    assert exp5['end_month'] == 5
    assert exp5['end_year'] == 2012
    assert exp5['city'] == u'Tampa'
    assert exp5['state'] == u'FL'
    exp6 = next((org for org in experiences
                 if (org["organization"] == u'Computer Generated Solutions'
                     and org['position'] == u'Team Lead')), None)
    assert exp6
    assert exp6['start_month'] == 12
    assert exp6['start_year'] == 2007
    assert exp6['end_month'] == 12
    assert exp6['end_year'] == 2008
    assert exp6['city'] == u'Tampa'
    assert exp6['state'] == u'FL'
    exp7 = next((org for org in experiences
                 if (org["organization"] == u'Computer Generated Solutions'
                     and org['position'] == u'Desktop Support Agent')), None)
    assert exp7
    assert exp7['start_month'] == 9
    assert exp7['start_year'] == 2006
    assert exp7['end_month'] == 2
    assert exp7['end_year'] == 2007
    assert exp7['city'] == u'Tampa'
    assert exp7['state'] == u'FL'
    exp6 = next((org for org in experiences
                 if (org["organization"] == u'Advanced System Design'
                     and org['position'] == u'Software Analyst')), None)
    assert exp6
    assert exp6['start_month'] == 10
    assert exp6['start_year'] == 2005
    assert exp6['end_month'] == 5
    assert exp6['end_year'] == 2006
    assert exp6['city'] == u'Tallahassee'
    assert exp6['state'] == u'FL'
    exp6 = next((org for org in experiences
                 if (org["organization"] == u'Bmc Solutions'
                     and org['position'] == u'Desktop Deployment Technician')),
                None)
    assert exp6
    assert exp6['start_month'] == 7
    assert exp6['start_year'] == 2005
    assert exp6['end_month'] == 8
    assert exp6['end_year'] == 2005
    assert exp6['city'] == u'Tallahassee'
    assert exp6['state'] == u'FL'
    educations_xml_list = bs4(PDF, 'lxml').findAll('education')
    educations = parse_candidate_educations(educations_xml_list)
    edu1 = next((edu for edu in educations
                 if edu["school_name"] == u'ITT Technical Institute'), None)
    assert edu1
    assert {
        'start_month':
        None,
        'end_month':
        1,
        'start_year':
        None,
        'bullets': [{
            'major': u'Information Systems/Cyber Securities',
            'comments': None
        }],
        'title':
        u'Bachelor of Science Degree',
        'gpa_num':
        None,
        'end_year':
        2013,
        'type':
        "Bachelor's"
    } in edu1['degrees']
Пример #7
0
def test_g626a_accuracy():
    # Contact Parsing.
    contact_xml_list = bs4(GET_626a, 'lxml').findAll('contact')
    contact_xml = parse_candidate_name(contact_xml_list)
    phones = parse_candidate_phones(contact_xml_list)
    addresses = parse_candidate_addresses(contact_xml_list)
    # assert contact_xml['first_name'] == 'Yetunde'
    # assert contact_xml['last_name'] == 'Laniran'
    assert GET_626a_ADDRESS in addresses
    assert {'value': u'+15033330350', 'label': 'Other'} in phones
    # Experience parsing.
    experience_xml_list = bs4(GET_626a, 'lxml').findAll('experience')
    experiences = parse_candidate_experiences(experience_xml_list)
    for exp in experiences:
        assert len(exp['bullets']) == 1
    exp1 = next(
        (org
         for org in experiences if org["organization"] == u'Census Bureau'),
        None)
    assert exp1
    assert exp1['start_month'] == 9
    assert exp1['start_year'] == 2009
    assert exp1['city'] == u'Bothell'
    assert exp1['state'] == u'WA'
    assert exp1['position'] == u'Partnership Specialist'
    exp2 = next((org for org in experiences
                 if org['organization'] == u'Nw Facility Maintenance'), None)
    assert exp2
    assert exp2['position'] == u'Network Admin/Project Coordinator'
    assert exp2['start_month'] == 1
    assert exp2['start_year'] == 2007
    assert exp2['end_month'] == 9
    assert exp2['end_year'] == 2009
    assert exp2['city'] == u'Portland'
    assert exp2['state'] == u'OR'
    # exp3 = next((org for org in experiences if org['organization'] == u'Via Training'), None)
    exp4 = next((org for org in experiences
                 if org['organization'] == u'Oregon Catholic Press'), None)
    assert exp4
    assert exp4['position'] == u'Project Manager/ Computer Support Specialist'
    assert exp4['start_month'] == 9
    assert exp4['start_year'] == 2001
    assert exp4['end_month'] == 12
    assert exp4['end_year'] == 2006
    assert exp4['city'] == u'Portland'
    assert exp4['state'] == u'OR'
    exp5 = next(
        (org for org in experiences if org["organization"] == u'Teksystems'),
        None)
    assert exp5
    assert exp5['position'] == u'Computer Support Specialist'
    assert exp5['start_month'] == 7
    assert exp5['start_year'] == 2000
    assert exp5['end_month'] == 7
    assert exp5['end_year'] == 2001
    assert exp5['city'] == u'Portland'
    assert exp5['state'] == u'OR'
    exp6 = next(
        (org for org in experiences if org["organization"] == u'Manpower'),
        None)
    assert exp6
    assert exp6['position'] == u'Computer Support Specialist'
    assert exp6['start_month'] == 9
    assert exp6['start_year'] == 1999
    assert exp6['end_month'] == 7
    assert exp6['end_year'] == 2000
    assert exp6['city'] == u'Portland'
    assert exp6['state'] == u'OR'
    exp7 = next((org for org in experiences
                 if org["organization"] == u'Portland Youth Builders'), None)
    assert exp7
    assert exp7['position'] == u'Computer Instructor/Support Specialist'
    assert exp7['start_month'] == 4
    assert exp7['start_year'] == 1999
    assert exp7['end_month'] == 9
    assert exp7['end_year'] == 1999
    assert exp7['city'] == u'Portland'
    assert exp7['state'] == u'OR'
    exp8 = next((org for org in experiences
                 if org["organization"] == u'University Of Oregon'), None)
    assert exp8
    # assert exp8['position'] == u'Instructor- Linguistics'
    assert exp8['start_month'] == 12
    assert exp8['start_year'] == 1998
    assert exp8['end_month'] == 3
    assert exp8['end_year'] == 1999
    exp9 = next((org for org in experiences
                 if org["organization"] == u'Portland State University'), None)
    assert exp9
    # assert exp9['position'] == u'Instructor - Language and Culture'
    assert exp9['start_month'] == 7
    assert exp9['start_year'] == 1998
    assert exp9['end_month'] == 8
    assert exp9['end_year'] == 1998
    # exp10 = next((org for org in experiences if org["organization"] == u'Oregon Graduate Institute'), None)
    exp11 = next((org for org in experiences
                  if org["organization"] == u'University Of North Carolina'),
                 None)
    assert exp11
    # assert exp11['position'] == u'Research Associcate - Linguistics'
    assert exp11['start_month'] == 1
    assert exp11['start_year'] == 1995
    assert exp11['end_month'] == 6
    assert exp11['end_year'] == 1997
    # Educations.
    educations_xml_list = bs4(GET_626a, 'lxml').findAll('education')
    educations = parse_candidate_educations(educations_xml_list)
    edu1 = next((edu for edu in educations
                 if edu["school_name"] == u'University of Phoenix'), None)
    assert edu1
    assert {
        'start_month': None,
        'end_month': 1,
        'start_year': None,
        'bullets': [{
            'major': u'Information Systems/Management',
            'comments': None
        }],
        'title': u'Masters',
        'gpa_num': None,
        'end_year': 2007,
        'type': "Master's"
    } in edu1['degrees']
    edu2 = next(
        (edu for edu in educations if edu["school_name"] == u'Heald College'),
        None)
    assert edu2
    assert {
        'start_month': None,
        'end_month': 1,
        'start_year': None,
        'bullets': [{
            'major': u'Computer Technology',
            'comments': None
        }],
        'title': u'Diploma',
        'gpa_num': None,
        'end_year': 1999,
        'type': 'Diploma'
    } in edu2['degrees']
    # edu3 = next((edu for edu in educations if edu["school_name"] == u'Cornell University'), None)
    edu4 = next(
        (edu
         for edu in educations if edu["school_name"] == u'Cornell University'),
        None)
    assert edu4
    assert {
        'start_month': None,
        'end_month': 1,
        'start_year': None,
        'bullets': [{
            'major': u'Linguistics',
            'comments': None
        }],
        'title': u'Master of Arts Degree',
        'gpa_num': None,
        'end_year': 1988,
        'type': "Master's"
    } in edu4['degrees']
    edu5 = next((edu for edu in educations
                 if edu["school_name"] == u'University of Ibadan'), None)
    assert edu5
    assert {
        'start_month': None,
        'end_month': 1,
        'start_year': None,
        'bullets': [{
            'major': u'Linguistics',
            'comments': None
        }],
        'title': u'Bachelor of Arts Degree',
        'gpa_num': None,
        'end_year': 1979,
        'type': "Bachelor's"
    } in edu5['degrees']
Пример #8
0
def test_g642_accuracy():
    # Contact Parsing.
    contact_xml_list = bs4(GET_642, 'lxml').findAll('contact')
    first, last = parse_candidate_name(contact_xml_list)
    phones = parse_candidate_phones(contact_xml_list)
    addresses = parse_candidate_addresses(contact_xml_list)
    assert first == u'Bobby'
    assert last == u'Breland'
    assert {'value': u'+15137595877', 'label': 'Home'} in phones
    assert {'value': u'+15134773784', 'label': 'Mobile'} in phones
    assert GET_642_ADDRESS in addresses
    # Experience parsing.
    experience_xml_list = bs4(GET_642, 'lxml').findAll('experience')
    experiences = parse_candidate_experiences(experience_xml_list)
    for exp in experiences:
        assert len(exp['bullets']) == 1
    exp1 = next((org for org in experiences
                 if org["organization"] == u'Pivotalthought Llc'), None)
    exp2 = next(
        (org for org in experiences if org["organization"] == u'Gxs, Inc'),
        None)
    exp3 = next(
        (org
         for org in experiences if org["organization"] == u'Sun Microsystems'),
        None)
    exp4 = next((org for org in experiences
                 if org["organization"] == u'First Consulting Group'), None)
    exp5 = next((org for org in experiences if org["organization"] ==
                 u'Computer Sciences Corporation Consulting Group'), None)
    exp6 = next(
        (org for org in experiences
         if org["organization"] == u'Seebeyond Technology Corporation'), None)
    exp7 = next(
        (org
         for org in experiences if org["organization"] == u'Collaborex, Inc'),
        None)
    # exp8 = next((org for org in experiences if org["organization"] == u'Origin Technology in Business'), None)
    exp8 = next((org for org in experiences
                 if org["organization"] == u'Origin Technology'), None)
    exp9 = next(
        (org
         for org in experiences if org["organization"] == u'R.w. Johnson Pri'),
        None)
    exp10 = next(
        (org for org in experiences
         if org["organization"] == u'Taratec Development Corporation'), None)
    exp11 = next(
        (org for org in experiences if org["organization"] == u'H. B. Zachry'),
        None)
    exp12 = next(
        (org for org in experiences if org["organization"] == u'Triple I'),
        None)
    assert None not in [
        exp1, exp2, exp3, exp4, exp5, exp6, exp7, exp8, exp9, exp10, exp11,
        exp12
    ]
    assert exp1['start_month'] == 1
    assert exp1['start_year'] == 2010
    # assert exp1['city'] == u'Liberty Township'
    assert exp1['city'] == u'Liberty'
    assert exp2['start_month'] == 8
    assert exp2['start_year'] == 2008
    assert exp2['end_month'] == 1
    assert exp2['end_year'] == 2010
    assert exp2['city'] == u'Gaithersburg'
    assert exp3['start_month'] == 3
    assert exp3['start_year'] == 2006
    assert exp3['end_month'] == 7
    assert exp3['end_year'] == 2008
    assert exp3['city'] == u'Santa Clara'
    assert exp4['start_month'] == 11
    assert exp4['start_year'] == 2005
    assert exp4['end_month'] == 3
    assert exp4['end_year'] == 2006
    assert exp4['city'] == u'Long Beach'
    assert exp5['start_month'] == 2
    assert exp5['start_year'] == 2003
    assert exp5['end_month'] == 11
    assert exp5['end_year'] == 2005
    assert exp5['city'] == u'Waltham'
    assert exp6['start_month'] == 3
    assert exp6['start_year'] == 2001
    assert exp6['end_month'] == 2
    assert exp6['end_year'] == 2003
    assert exp6['city'] == u'Monrovia'
    assert exp7['start_month'] == 4
    assert exp7['start_year'] == 2000
    assert exp7['end_month'] == 3
    assert exp7['end_year'] == 2001
    assert exp7['city'] == u'Fairfax'
    assert exp8['start_month'] == 2
    assert exp8['start_year'] == 1998
    assert exp8['end_month'] == 4
    assert exp8['end_year'] == 2000
    assert exp8['city'] == u'Cincinnati'
    assert exp9['start_month'] == 5
    assert exp9['start_year'] == 1995
    assert exp9['end_month'] == 2
    assert exp9['end_year'] == 1998
    assert exp9['city'] == u'Raritan'
    assert exp10['start_month'] == 12
    assert exp10['start_year'] == 1991
    assert exp10['end_month'] == 5
    assert exp10['end_year'] == 1995
    assert exp10['city'] == u'Bridgewater'
    assert exp11['start_month'] == 4
    assert exp11['start_year'] == 1991
    assert exp11['end_month'] == 12
    assert exp11['end_year'] == 1991
    assert exp11['city'] == u'San Antonio'
    assert exp12['start_month'] == 6
    assert exp12['start_year'] == 1989
    assert exp12['end_month'] == 4
    assert exp12['end_year'] == 1991
    assert exp12['city'] == u'Deepwater'
    # Educations.
    educations_xml_list = bs4(GET_642, 'lxml').findAll('education')
    educations = parse_candidate_educations(educations_xml_list)
    edu1 = next((edu for edu in educations
                 if edu["school_name"] == u'Northeast Louisiana University'),
                None)
    assert edu1
    assert {
        'start_month': None,
        'end_month': 12,
        'start_year': None,
        'bullets': [{
            'major': u'Computer Science',
            'comments': None
        }],
        'title': u'B.S',
        'gpa_num': None,
        'end_year': 1988,
        'type': 'Bachelor of Science'
    } in edu1['degrees']
Пример #9
0
def test_docx_accuracy():
    # Contact Parsing.
    contact_xml_list = bs4(DOCX, 'lxml').findAll('contact')
    first, last = parse_candidate_name(contact_xml_list)
    phones = parse_candidate_phones(contact_xml_list)
    addresses = parse_candidate_addresses(contact_xml_list)
    assert DOCX_ADDRESS in addresses
    assert first == 'Veena'
    assert last == 'Nithoo'
    assert {'value': u'+12154120817', 'label': 'Other'} in phones
    # Experience parsing.
    experience_xml_list = bs4(DOCX, 'lxml').findAll('experience')
    experiences = parse_candidate_experiences(experience_xml_list)
    for exp in experiences:
        assert len(exp['bullets']) == 1
    exp1 = next(
        (org
         for org in experiences if org["organization"] == u'Merck & Co, Inc'),
        None)
    exp2 = next(
        (org for org in experiences if org["organization"] == u'Infomc Inc'),
        None)
    exp3 = next(
        (org
         for org in experiences if org["organization"] == u'Datakinetics Inc'),
        None)
    exp4 = next(
        (org for org in experiences
         if org["organization"] == u'Harel Mallac, Mcs Development Ltd'), None)
    exp5 = next((org for org in experiences
                 if org["organization"] == u'Gt Management Ltd'), None)
    assert None not in [exp1, exp2, exp3, exp4, exp5]
    assert exp1['start_month'] == 10
    assert exp1['start_year'] == 2000
    assert exp1['position'] == u'Application Services Analyst'
    assert exp2['start_month'] == 9
    assert exp2['start_year'] == 1999
    assert exp2['end_month'] == 6
    assert exp2['end_year'] == 2000
    assert exp2['position'] == u'Analyst Programmer'
    assert exp3['start_month'] == 1
    assert exp3['start_year'] == 1998
    assert exp3['end_month'] == 8
    assert exp3['end_year'] == 1999
    assert exp3['position'] == u'Analyst Programmer'
    assert exp4['start_month'] == 3
    assert exp4['start_year'] == 1996
    assert exp4['end_month'] == 5
    assert exp4['end_year'] == 1997
    assert exp4['position'] == u'Analyst Programmer'
    assert exp5['start_month'] == 9
    assert exp5['start_year'] == 1993
    assert exp5['end_month'] == 9
    assert exp5['end_year'] == 1994
    assert exp5['position'] == u'Analyst Programmer'
    # Education Parsing.
    educations_xml_list = bs4(DOCX, 'lxml').findAll('education')
    educations = parse_candidate_educations(educations_xml_list)
    edu1 = next((edu for edu in educations
                 if edu["school_name"] == u'South Bank University'), None)
    assert edu1
    assert edu1['city'] == u'London'
    assert {
        'start_month': None,
        'end_month': 7,
        'start_year': None,
        'bullets': [{
            'major': u'Computing Studies',
            'comments': None
        }],
        'title': u'B.Sc',
        'gpa_num': None,
        'end_year': 1995,
        'type': 'Bachelor of Science'
    } in edu1['degrees']
Пример #10
0
def test_pdf14_accuracy():
    # Contact Parsing.
    contact_xml_list = bs4(PDF_14, 'lxml').findAll('contact')
    first, last = parse_candidate_name(contact_xml_list)
    phones = parse_candidate_phones(contact_xml_list)
    addresses = parse_candidate_addresses(contact_xml_list)
    experience_xml_list = bs4(PDF_14, 'lxml').findAll('experience')
    experiences = parse_candidate_experiences(experience_xml_list)
    for exp in experiences:
        assert len(exp['bullets']) == 1
    educations_xml_list = bs4(PDF_14, 'lxml').findAll('education')
    educations = parse_candidate_educations(educations_xml_list)
    assert first == 'Jose'
    assert last == 'Chavez'
    # assert {'value': u'604.609.0921'} in phones
    # exp1 = next((org for org in experiences if (
    #     org["organization"] == u'Organization Committee Commonwelath Games 2010' and
    #     org['position'] == u'Games Management Systems Director')), None)
    # exp2 = next((org for org in experiences if (
    #     org["organization"] == u'Atos Origin Canada' and
    #     org['position'] == u'Core Games Systems Application Manager')), None)
    # exp3 = next((org for org in experiences if (
    #     org["organization"] == u'Design Maintenance Systems Inc.' and
    #     org['position'] == u'Software Testing Engineer/ Jr. Developer')), None)
    # exp4 = next((org for org in experiences if (
    #     org["organization"] == u'Orbital Technologies Inc.' and
    #     org['position'] == u'Software Testing Engineer')), None)
    edu1 = next(
        (edu
         for edu in educations if edu["school_name"] == u'ITESO University'),
        None)
    assert edu1
    assert {
        'start_month': None,
        'end_month': 1,
        'start_year': None,
        'bullets': [{
            'major': u'Computer Systems Engineer',
            'comments': None
        }],
        'title': u'B. Sc',
        'gpa_num': None,
        'end_year': 1992,
        'type': 'Bachelor of Science'
    } in edu1['degrees']
    # edu2 = next((edu for edu in educations if edu["school_name"] == u'ITESM University'), None)
    edu3 = next(
        (edu for edu in educations
         if edu["school_name"] == u'British Columbia Institute of Technology'),
        None)
    assert {
        'start_month': None,
        'end_month': 1,
        'start_year': None,
        'bullets': [{
            'major': u'software Engineering',
            'comments': None
        }],
        'title': u'Software Engineering Diploma',
        'gpa_num': None,
        'end_year': 1993,
        'type': 'Diploma'
    } in edu3['degrees']