def categorize_resume(): resume_ids = fetch_resume_ids() normalizer = Normalizer() categories = [] print(resume_ids) for resume_id in resume_ids: skills = fetch_skills(resume_id) output_group = [0, 0, 0, 0] cat = fetch_category(resume_id) print('category from db....') print(cat) print(skills) skills = normalizer.interpolate_skills(skills) lack = 0 input_size = 5 grouped_skills = group_list(skills, 5) for group in grouped_skills: skills = numpy.array([group]) outputs = classifier.classify(skills) index = numpy.argmax(outputs) category = get_job_description(index) print('\n') print(outputs) output_group.append(outputs) print(category) counter = 0 out = outputs[0] for item in out: output_group[counter] += item # output_group[counter] = output_group[counter] / (grouped_skills.index(group) + 1) # print(output_group[counter]) counter += 1 if counter == 4: counter = 0 # print(output_group) # print(len(output_group)) output_group = output_group[0:4] index = get_max(output_group) category = get_job_description(index) ''' Converts the output to probabiilty and appends it to the category string ''' sum_output = sum(output_group[0:4]) for output in output_group[0:4]: output = (output / sum_output) * 100 str_out = str(output) category = category + ',' + str_out categories.append(category) update_resume_category(category, resume_id) print('\n') print(output_group[0:4]) print('\nFinal-Output: ' + category) print('\n****\n') return categories