def submit(): global university global tree_json if "major" in request.form: major = str(request.form["major"]).strip('"') if "university" in request.form: university_ip = str(request.form["university"]).strip('"') # create_data_for_graph(university_ip, major, skills_employer, univ_major_number, major_code_lookup) tree_json = create_data_for_tree( university_ip, major, skills_employer_tree, univ_major_number, major_code_lookup, employer_second_degree_tree ) else: # create_data_for_graph(university, major, skills_employer, univ_major_number, major_code_lookup) tree_json = create_data_for_tree( university, major, skills_employer_tree, univ_major_number, major_code_lookup, employer_second_degree_tree ) return json.dumps(tree_json)
def submit(): global university global tree_json if "major" in request.form: major = str(request.form["major"]).strip('"') if "university" in request.form: university_ip = str(request.form["university"]).strip('"') # create_data_for_graph(university_ip, major, skills_employer, univ_major_number, major_code_lookup) tree_json = create_data_for_tree(university_ip, major, skills_employer_tree, univ_major_number, major_code_lookup, employer_second_degree_tree) else: # create_data_for_graph(university, major, skills_employer, univ_major_number, major_code_lookup) tree_json = create_data_for_tree(university, major, skills_employer_tree, univ_major_number, major_code_lookup, employer_second_degree_tree) return json.dumps(tree_json)
def analyze(): # global results_json global university global tree_json if request.method: # Get and save file from browser upload files = request.files['file'] if files: filename = str(files.filename) extension = filename.rsplit('.', 1)[1] filename_without_extension = filename.rsplit('.', 1)[0] files.save(os.path.join(iHire.config['UPLOAD_FOLDER'], filename)) if extension == 'pdf': text_from_pdf = extract_text_from_pdf(filename) text_from_pdf = text_from_pdf.replace('\xc2\xa0', ' ') with open(filename_without_extension + '.txt', 'wb') as write_file: write_file.write(text_from_pdf) textfile_name = filename_without_extension + '.txt' else: textfile_name = filename university = extract_univ(open(textfile_name).read(), univ_dict, univ_normalize) print filename # create_data_for_graph(university, "", skills_employer, univ_major_number, major_code_lookup) tree_json = create_data_for_tree( university, "", skills_employer_tree, univ_major_number, major_code_lookup, employer_second_degree_tree ) resume_text = [open(textfile_name).read()] predicted_decision = model.decision_function(resume_text) top_predictions, normalized_prediction_score = get_top_predictions(predicted_decision) out = dict() skills_map_with_percent_list = [] titles = sorted(skills_map_with_percent.keys()) for title in titles: temp_skill_map = dict() temp_skill_map[title] = skills_map_with_percent[title] skills_map_with_percent_list.append(temp_skill_map) out["university"] = university out["skills_map"] = skills_map_with_percent_list out["titles"] = titles out["candidate_skills"] = dict() out["title_data"] = dict() try: tokens = nltk.word_tokenize(resume_text[0].lower()) except UnicodeDecodeError: tokens = nltk.word_tokenize(resume_text[0].decode('utf-8').lower()) skill_score = [] for pred in top_predictions: try: top15 = skills_map_with_percent[title_title_map[pred]]["skills"][:15] except KeyError: top15 = [] temp_skill_list = [t for t in top15 if len(t) > 1 and t.lower() in tokens] out["candidate_skills"][title_title_map[pred]] = temp_skill_list out["title_data"][title_title_map[pred]] = titles_data[title_title_map[pred]] skill_score.append(int(len(temp_skill_list) / 15.0 * 100.0)) final_score = [sum(x)/2 for x in zip(normalized_prediction_score, skill_score)] final_titles_list = [] sorted_score_indexes = [i[0] for i in sorted(enumerate(final_score), key=lambda x:x[1], reverse=True)] for s in sorted_score_indexes: final_titles_list.append(title_title_map[top_predictions[s]]) final_score_sorted = sorted(final_score, reverse=True) out["final_prediction_list"] = final_titles_list out["final_score_sorted"] = final_score_sorted out["tree_json"] = json.dumps(tree_json) print final_titles_list[:5] print final_score_sorted[:5] if os.path.isfile(textfile_name): os.remove(textfile_name) if os.path.isfile(filename): os.remove(filename) # results_json = OrderedDict(out) return json.dumps(OrderedDict(out))
def analyze(): # global results_json global university global tree_json if request.method: # Get and save file from browser upload files = request.files['file'] if files: filename = str(files.filename) extension = filename.rsplit('.', 1)[1] filename_without_extension = filename.rsplit('.', 1)[0] files.save(os.path.join(iHire.config['UPLOAD_FOLDER'], filename)) if extension == 'pdf': text_from_pdf = extract_text_from_pdf(filename) text_from_pdf = text_from_pdf.replace('\xc2\xa0', ' ') with open(filename_without_extension + '.txt', 'wb') as write_file: write_file.write(text_from_pdf) textfile_name = filename_without_extension + '.txt' else: textfile_name = filename university = extract_univ( open(textfile_name).read(), univ_dict, univ_normalize) print filename # create_data_for_graph(university, "", skills_employer, univ_major_number, major_code_lookup) tree_json = create_data_for_tree(university, "", skills_employer_tree, univ_major_number, major_code_lookup, employer_second_degree_tree) resume_text = [open(textfile_name).read()] predicted_decision = model.decision_function(resume_text) top_predictions, normalized_prediction_score = get_top_predictions( predicted_decision) out = dict() skills_map_with_percent_list = [] titles = sorted(skills_map_with_percent.keys()) for title in titles: temp_skill_map = dict() temp_skill_map[title] = skills_map_with_percent[title] skills_map_with_percent_list.append(temp_skill_map) out["university"] = university out["skills_map"] = skills_map_with_percent_list out["titles"] = titles out["candidate_skills"] = dict() out["title_data"] = dict() try: tokens = nltk.word_tokenize(resume_text[0].lower()) except UnicodeDecodeError: tokens = nltk.word_tokenize( resume_text[0].decode('utf-8').lower()) skill_score = [] for pred in top_predictions: try: top15 = skills_map_with_percent[ title_title_map[pred]]["skills"][:15] except KeyError: top15 = [] temp_skill_list = [ t for t in top15 if len(t) > 1 and t.lower() in tokens ] out["candidate_skills"][ title_title_map[pred]] = temp_skill_list out["title_data"][title_title_map[pred]] = titles_data[ title_title_map[pred]] skill_score.append(int(len(temp_skill_list) / 15.0 * 100.0)) final_score = [ sum(x) / 2 for x in zip(normalized_prediction_score, skill_score) ] final_titles_list = [] sorted_score_indexes = [ i[0] for i in sorted( enumerate(final_score), key=lambda x: x[1], reverse=True) ] for s in sorted_score_indexes: final_titles_list.append(title_title_map[top_predictions[s]]) final_score_sorted = sorted(final_score, reverse=True) out["final_prediction_list"] = final_titles_list out["final_score_sorted"] = final_score_sorted out["tree_json"] = json.dumps(tree_json) print final_titles_list[:5] print final_score_sorted[:5] if os.path.isfile(textfile_name): os.remove(textfile_name) if os.path.isfile(filename): os.remove(filename) # results_json = OrderedDict(out) return json.dumps(OrderedDict(out))