def test_resume_urls_and_their_categories(self): ideal_resume_urls = get_urls(self.ideal_resume_data) resume_without_name_urls = get_urls(self.resume_without_name) self.assertEqual(len(ideal_resume_urls), 1) self.assertEqual(ideal_resume_urls[0], 'api.ai') self.assertEqual(len(resume_without_name_urls), 1) self.assertEqual(resume_without_name_urls[0], 'api.ai') category_urls = url_categories(ideal_resume_urls) category_of_without_name_urls = url_categories(resume_without_name_urls) self.assertEqual(category_of_without_name_urls['others'][0], 'http://api.ai') self.assertEqual(category_urls['others'][0], 'http://api.ai')
def test_website_score(self): urls = get_urls(self.test_website_score) category_urls = url_categories(urls) # self.assertEqual('http://imgur.com/', category_urls['others'][0]) url_response = get_url_response(category_urls) resume = Resume.objects.create(parse_status=0) for item in url_response: resume_urls = Url.objects.filter(url=item['name']) if resume_urls.exists(): resume_url = resume_urls[0] resume_url.category = item['type'] resume_url.save() else: resume_url = Url.objects.create(url=item['name'], category=item['type']) resume.urls.add(resume_url) first_name, last_name, phone_number, email = get_basics( settings.TESTDATA_DIRS + 'score_calculation/test_website_score.txt') self.assertEqual(email, '*****@*****.**') apply_website_score = mock.Mock(return_value=(None, None, None)) (website_activity_score, website_reputation_score, website_contribution_score) = apply_website_score( category_urls, resume, email) self.assertEqual(website_activity_score, None) self.assertEqual(website_contribution_score, None) self.assertEqual(website_reputation_score, None)
def parse_resume_internal(path, text, resume_id, file_name, hash_value, post_data): urls = get_urls(text) categories_url = url_categories(urls) url_response = get_url_response(categories_url) resume_details = Resume.objects.get(id=resume_id) response = extract_resume(path) basics = response.get('basics') if basics: first_name = basics.get('first_name') last_name = basics.get('last_name') phone_number = basics.get('phone') email = basics.get('email') if email: email = email[0] for item in url_response: resume_url = Url.objects.create(category=item['type'], url=item['name']) resume_details.urls.add(resume_url) # Resume resume_details.first_name = post_data.get('first_name') resume_details.last_name = post_data.get('last_name') resume_details.phone_number = post_data.get('contact_no') resume_details.email = post_data.get('email') if not resume_details.first_name: resume_details.first_name = first_name[:45] if not resume_details.last_name: resume_details.last_name = last_name[:45] if not resume_details.phone_number: resume_details.phone_number = phone_number if not resume_details.email: resume_details.email = email resume_details.content = text resume_details.file_name = file_name resume_details.resume_location = path resume_details.content_hash = hash_value resume_details.parse_status = Resume.STATUS.processed resume_details.save() return resume_details
def test_bitbucket_score_calculation(self): urls = get_urls(self.test_bitbucket_score) category_urls = url_categories(urls) contribution_urls = category_urls['contributions'] bitbucket_url = get_bit_bucket_url(contribution_urls) # this issue can be sorted by get_bitbucket_username = mock.Mock(return_value='Anubhav_722') bitbucket_username = get_bitbucket_username(bitbucket_url) # bitbucket_username = bitbucket_username.split('\\') # bitbucket_username = bitbucket_username[0] self.assertEqual(bitbucket_username, 'Anubhav_722') resume = Resume.objects.create(parse_status=0) url_response = get_url_response(category_urls) for item in url_response: resume_urls = Url.objects.filter(url=item['name']) if resume_urls.exists(): resume_url = resume_urls[0] resume_url.category = item['type'] resume_url.save() else: resume_url = Url.objects.create(url=item['name'], category=item['type']) resume.urls.add(resume_url) apply_bitbucket_score = mock.Mock(return_value=(.2, .05, 0.0)) activity_score, reputation_score, contribution_score = apply_bitbucket_score( bitbucket_username, resume) # user_type = 1 so activity_score = 0.2 # no. of repos = 2 so contribution_score = .05 # no. of followers = 0 so reputation_score = 0.0 self.assertEqual(activity_score, .2) self.assertEqual(contribution_score, .05) self.assertEqual(reputation_score, 0.0)
def test_blog_score(self): urls = get_urls(self.test_blog_score) category_urls = url_categories(urls) # fix for fetching blog_urls # blog_url = categories_url['blog'][0].split('\\')[0] category_urls['blog'][0] = category_urls['blog'][0].split('\\')[0] resume = Resume.objects.create(parse_status=0) url_response = get_url_response(category_urls) for item in url_response: resume_urls = Url.objects.filter(url=item['name']) if resume_urls.exists(): resume_url = resume_urls[0] resume_url.category = item['type'] resume_url.save() else: resume_url = Url.objects.create(url=item['name'], category=item['type']) resume.urls.add(resume_url) self.assertEqual(resume.urls.all()[0].url, 'https://www.tumblr.com/blog/i-psychoassassin') apply_blog_score(category_urls, resume) # avg_activity_score = 0.4 # avg_contribution_score = 0.0 # avg_reputation_score = 0.0 # avg_total_score = 0.0 calculate_blog_scores = mock.Mock(return_value=(.4, 0, 0)) avg_blog_activity_score, avg_blog_reputation_score, avg_blog_contribution_score = calculate_blog_scores( resume) self.assertEqual(avg_blog_activity_score, .4) self.assertEqual(avg_blog_reputation_score, 0) self.assertEqual(avg_blog_contribution_score, 0)
def test_itunes_store_score(self): urls = get_urls(self.test_itunes_score) category_urls = url_categories(urls) url_response = get_url_response(category_urls) self.assertIn( 'https://itunes.apple.com/in/app/whatsapp-messenger/id310633997?mt=8', category_urls['apps']) first_name, last_name, email, phone_number = get_basics( settings.TESTDATA_DIRS + 'score_calculation/test_itunes_score.txt') self.assertEqual(first_name, 'anubhav') resume = Resume.objects.create(parse_status=0) for item in url_response: resume_urls = Url.objects.filter(url=item['name']) if resume_urls.exists(): resume_url = resume_urls[0] resume_url.category = item['type'] resume_url.save() else: resume_url = Url.objects.create(url=item['name'], category=item['type']) resume.urls.add(resume_url) calculate_average_mobile_contrib_score = mock.Mock(return_value=(.3, .7, .05)) apply_itunes_score(category_urls, first_name, resume) avg_mobile_apps_activity_score, avg_mobile_apps_reputation_score, avg_mobile_apps_contribution_score = calculate_average_mobile_contrib_score( resume) self.assertEqual(avg_mobile_apps_contribution_score, .05) self.assertEqual(avg_mobile_apps_reputation_score, .7) self.assertEqual(avg_mobile_apps_activity_score, .3)
def test_github_score_calculation(self): get_github_username = mock.Mock(return_value='Anubhav722') github_username = get_github_username(self.test_github_score) resume = Resume.objects.create(parse_status=0) urls = get_urls(self.test_github_score) category_urls = url_categories(urls) url_response = get_url_response(category_urls) for item in url_response: resume_urls = Url.objects.filter(url=item['name']) if resume_urls.exists(): resume_url = resume_urls[0] resume_url.category = item['type'] resume_url.save() else: resume_url = Url.objects.create(url=item['name'], category=item['type']) resume.urls.add(resume_url) # this issue can be sorted by # putting this logic in get_github_username() in resume/utils/parser_helper.py while returning match # github_username = github_username.split('\\') # github_username = github_username[0] self.assertEqual(github_username, 'Anubhav722') # user_type = 2 so activity_score = .2 # no. of repos = 136 so contribution_score = .6 # no. of followers =6 and medium active user(user_type=2) so reputation_score = .01 apply_github_score = mock.Mock(return_value=(.1, .01, .6)) activity_score, reputation_score, contribution_score = apply_github_score( github_username, resume) self.assertEqual(activity_score, .1) self.assertEqual(reputation_score, .01) self.assertEqual(contribution_score, .6)
def test_play_store_score(self): urls = get_urls(self.test_play_store_score) category_urls = url_categories(urls) category_urls['apps'][0] = category_urls['apps'][0].split('\\')[0] url_response = get_url_response(category_urls) resume = Resume.objects.create(parse_status=0) for item in url_response: resume_urls = Url.objects.filter(url=item['name']) if resume_urls.exists(): resume_url = resume_urls[0] resume_url.category = item['type'] resume_url.save() else: resume_url = Url.objects.create(url=item['name'], category=item['type']) resume.urls.add(resume_url) apply_play_store_app_score(category_urls, resume) # app rating is 4.4 so AVG_MOBILE_APP_REPUTATION_SCORE = .44 # app downloads are between 5K - 10K so, AVG_MOBILE_APP_CONTRIBUTION_SCORE = .25 # app last updated date is 2nd april 2017, so AVG_MOBILE_APP_ACTIVITY_SCORE = .3 calculate_average_mobile_contrib_score = mock.Mock(return_value=(.25, .44, .2)) avg_mobile_apps_activity_score, avg_mobile_apps_reputation_score, avg_mobile_apps_contribution_score = calculate_average_mobile_contrib_score( resume) self.assertEqual(avg_mobile_apps_reputation_score, 0.44) self.assertEqual(avg_mobile_apps_activity_score, .25) self.assertEqual(avg_mobile_apps_contribution_score, .2)
def test_get_urls(self): self.assertEqual(get_urls(self.text), urls)
def benchmark(quick_mode=False): global resume_list user = {} if quick_mode: resume_list = resume_list[:5] for resume in resume_list: response = extract_resume(resume) # github_username = get_github_username(text) # stackoverflow_userid = get_stackoverflow_userid(text) # stackoverflow_username = get_stackoverflow_username(text) # stack_user_details = {} # git_user_details = {} # repo_details = {} # if stackoverflow_userid is None: # pass # else: # stack_user_details = stackoverflow_user_details(stackoverflow_userid) # if github_username is None: # pass # else: # git_user_details = github_user_details(github_username) # repo_details = git_user_details['repo_details'] # github_url = text = get_text(resume) if text is None: pass else: urls = get_urls(text) categories = url_categorizer(urls, text) file_name = resume blog = ' ' personal_website = ' ' github_url = ' ' stackoverflow_url = ' ' linkedin_url = ' ' bit_bucket = ' ' gist_url = ' ' other_urls = ' ' if 'Blog' in list(categories['Websites'].keys()): blog = categories['Websites']['Blog'] if 'Personal Website' in list( categories['Websites']['Personal Urls'].keys()): personal_website = categories['Websites']['Personal Urls'][ 'Personal Website'] if 'Other Urls' in list( categories['Websites']['Personal Urls'].keys()): other_urls = categories['Websites']['Personal Urls']['Other Urls'] if 'GitHub Url' in list(categories['Social Websites'].keys()): github_url = categories['Social Websites']['GitHub Url'] if 'StackOverflow Url' in list(categories['Social Websites'].keys()): stackoverflow_url = categories['Social Websites'][ 'StackOverflow Url'] if 'LinkedIn Url' in list(categories['Social Websites'].keys()): linkedin_url = categories['Social Websites']['LinkedIn Url'] if 'BitBucket Url' in list(categories['Social Websites'].keys()): bit_bucket = categories['Social Websites']['BitBucket Url'] if 'GitHub Gist Url' in list(categories['Social Websites'].keys()): gist_url = categories['Social Websites']['GitHub Gist Url'] # social = {} # social = {'url_categories': categories} # # user['Basic'] = response # # user['Social'] = social # data = {'Basic': response, 'Social': social} # details = dict(chain(user.items(), data.items())) field_names = [ 'file_name', 'Personal Website', 'Blog', 'GitHub', 'LinkedIn', 'StackOverflow', 'BitBucket', 'GitHub_Gist', 'Other urls' ] with open('output.csv', 'a') as csvfile: write = csv.DictWriter(csvfile, fieldnames=field_names) write.writeheader() write.writerow({ 'file_name': file_name, 'Personal Website': personal_website, 'Blog': blog, 'GitHub': github_url, 'LinkedIn': linkedin_url, 'StackOverflow': stackoverflow_url, 'BitBucket': bit_bucket, 'GitHub_Gist': gist_url, 'Other urls': other_urls }) csvfile.close() # with open('json-out.json', 'w') as outfile: # json.dump(details, outfile) return json.dumps(user)
def parse_resume(path, text, resume_id, skills, file_name, hash_value, callback_client=False): content_list = text.lower().split() # Get resume instance. resume_details = Resume.objects.get(id=resume_id) # Categorising urls categories_url = url_categories(get_urls(text)) # Getting custom url response using get_url_response url_response = get_url_response(categories_url) # Get basic details. first_name, last_name, phone_number, email = get_basics(path) # Consider basic details only if resume_details for the same is non-nil. if resume_details.email: email = resume_details.email if resume_details.first_name: first_name = resume_details.first_name if resume_details.last_name: last_name = resume_details.last_name if resume_details.phone_number: phone_number = resume_details.phone_number # Url Instance for item in url_response: resume_urls = Url.objects.filter(url=item['name']) if resume_urls.exists(): resume_url = resume_urls[0] resume_url.category = item['type'] resume_url.save() else: resume_url = Url.objects.create(url=item['name'], category=item['type']) resume_details.urls.add(resume_url) # Skills Matching Score (skill_match_score, skills_matched, skills_not_matched) = get_skill_matching_score(skills, text) # Definition of scores github_contribution_score = 0 github_activity_score = 0 github_reputation_score = 0 bit_bucket_contribution_score = 0 bit_bucket_activity_score = 0 bit_bucket_reputation_score = 0 stackoverflow_contribution_score = 0 stackoverflow_reputation_score = 0 stackoverflow_activity_score = 0 # StackOverflow Score stackoverflow_user_id = get_stackoverflow_userid(text) if stackoverflow_user_id: (stackoverflow_activity_score, stackoverflow_reputation_score, stackoverflow_contribution_score) = apply_stackoverflow_score( stackoverflow_user_id, resume_details) # GitHub Score github_username = get_github_username(text) if github_username: (github_activity_score, github_reputation_score, github_contribution_score) = apply_github_score( github_username, resume_details) # Blog score apply_blog_score(categories_url, resume_details) # BitBucket Score contribution_urls = categories_url['contributions'] bit_bucket_url = get_bit_bucket_url(contribution_urls) if bit_bucket_url == 'No Url Found' or bit_bucket_url is None: pass else: bit_bucket_user_name = get_bitbucket_username(bit_bucket_url) (bit_bucket_activity_score, bit_bucket_reputation_score, bit_bucket_contribution_score) = apply_bitbucket_score( bit_bucket_user_name, resume_details) # MobileApp Database saving and score calculations # Play Store - Total Score apply_play_store_app_score(categories_url, resume_details) # ITunes - Total Score apply_itunes_score(categories_url, first_name, resume_details) # Website Score (website_activity_score, website_reputation_score, website_contribution_score) = apply_website_score(categories_url, resume_details, email) save_resume_skills(resume_details, skills_matched, skills_not_matched) # Work Experience features = FeatureExtraction() work_experience = features.get_work_experience(text) # Blog (average_blog_activity_score, average_blog_reputation_score, average_blog_contribution_score) = calculate_blog_scores(resume_details) # Website (average_website_activity_score, average_website_reputation_score, average_website_contribution_score ) = calculate_website_scores(resume_details) # average mobile contribution score (average_mobile_app_activity_score, average_mobile_app_reputation_score, average_mobile_app_contribution_score ) = calculate_average_mobile_contrib_score(resume_details) # Activity Scores blog_activity_score = average_blog_activity_score website_activity_score = average_website_activity_score mobile_app_activity_score = average_mobile_app_activity_score # Contributions Scores blog_contribution_score = average_blog_contribution_score website_contribution_score = average_website_contribution_score mobile_app_contribution_score = average_mobile_app_contribution_score # Reputation Scores blog_reputation_score = average_blog_reputation_score website_reputation_score = average_website_reputation_score mobile_app_reputation_score = average_mobile_app_reputation_score # Total Contribution Score coding_total_contribution_score = (github_contribution_score + bit_bucket_contribution_score + stackoverflow_contribution_score + mobile_app_contribution_score) social_total_contribution_score = (blog_contribution_score + website_contribution_score) # Total Activity score coding_total_activity_score = (github_activity_score + stackoverflow_activity_score + bit_bucket_activity_score + mobile_app_activity_score) social_total_activity_score = blog_activity_score + website_activity_score # Total Reputation Score coding_total_reputation_score = (github_reputation_score + stackoverflow_reputation_score + bit_bucket_reputation_score + mobile_app_reputation_score) social_total_reputation_score = (blog_reputation_score + website_reputation_score) # Total Coding score 2.5 out of 5 total_coding_score = (coding_total_contribution_score + coding_total_reputation_score + coding_total_activity_score) # Total Social Score 1.5 out 5 total_social_score = (social_total_contribution_score + social_total_activity_score + social_total_reputation_score) # Total Skill Matching Score 1 out 5 total_skill_score = skill_match_score # Saving to Scores Model # saving total contribution score coding_score_instance = Score.objects.create(type=Score.TYPES.coding, score=total_coding_score) resume_details.scores.add(coding_score_instance) social_score_instance = Score.objects.create(type=Score.TYPES.social, score=total_social_score) resume_details.scores.add(social_score_instance) skill_score_instance = Score.objects.create( type=Score.TYPES.skill_matching, score=total_skill_score) resume_details.scores.add(skill_score_instance) total_ranking = total_coding_score + total_social_score + total_skill_score # Extracting Location, Company and Institution Names extract_features = ExtractFeatures() locations = extract_features.get_location(text) companies = extract_features.get_company_names(text) institutions = extract_features.get_institution_names(text) for location in locations: location_instance, created = Location.objects.get_or_create( name=location) resume_details.locations.add(location_instance) for company in companies: company_instance, created = Company.objects.get_or_create(name=company) resume_details.companies.add(company_instance) for institution in institutions: institution_instance, created = Institution.objects.get_or_create( name=institution) resume_details.institutions.add(institution_instance) # Extract skills from provided text. # NOTE: As per now we're getting top 1000 tags from SO to extract skills. # With those skills we're getting intersection with list of content text. # Need to find better solution to do so [Bloom filter, et cetera] import re content_list = map(lambda x: re.sub('[^0-9a-zA-Z\.]+', '', x), content_list) # print(content_list) # content_list = map(lambda x: x.replace(',', ''), content_list) matched_skills = list(skillset.intersection(content_list)) for skill in matched_skills: skill_instance, created = Skill.objects.get_or_create(name=skill) rskills = ResumeSkills(resume=resume_details, skill=skill_instance) rskills.save() # Resume file_name = check_file_name_length(file_name) resume_details.first_name = first_name resume_details.last_name = last_name resume_details.phone_number = phone_number resume_details.parse_status = Resume.STATUS.processed resume_details.file_name = file_name resume_details.content_hash = hash_value resume_details.content = text resume_details.email = email resume_details.resume_location = path resume_details.experience = work_experience resume_details.total_score = total_ranking resume_details.save() if callback_client: resp = callback_internal_client(resume_details) if resp.status_code != requests.codes.ok: print( "ERROR: Unable to callback to internal client for resume: %s". format(str(resume_id))) return "Resume Processed"