Пример #1
0
 def get_discipline(major_phrase,
                    prefer_index,
                    discipline_phrase_dic,
                    debug=False):
     prefer_major = major_phrase[prefer_index]
     prefer_major = SegmentHelper.normalize(
         TextHelper.unicode_to_ascii(prefer_major))
     prefer_major = TextHelper.get_dict_pattern(prefer_major,
                                                discipline_phrase_dic)
     if len(prefer_major) == 0:
         if debug:
             print("prefer major can not found match phrase in dict: %s" %
                   major_phrase[prefer_index])
         prefer_major = ' '.join(major_phrase)
         prefer_major = SegmentHelper.normalize(
             TextHelper.unicode_to_ascii(prefer_major))
         prefer_major = TextHelper.get_dict_pattern(prefer_major,
                                                    discipline_phrase_dic)
         if len(prefer_major) == 0:
             if debug:
                 print("Can not found major words: %s" % str(major_phrase))
             return None
     max_length = max([len(key) for key in prefer_major.keys()])
     for major in prefer_major:
         if len(major) == max_length:
             return major
Пример #2
0
 def get_highest_education(profile, education_phrase_dic,
                           discipline_phrase_dic):
     education_dic = {}
     for i in range(len(profile['education'])):
         education = SegmentHelper.normalize(
             TextHelper.unicode_to_ascii(profile['education'][i]))
         education_dic[i] = TextHelper.get_dict_pattern(
             education, education_phrase_dic)
     education_dic = {
         e_dic.keys()[0]: index
         for index, e_dic in education_dic.items() if len(e_dic) > 0
     }
     if 'Doctor' in education_dic:
         return ['Doctor'], [
             ProfileHelper.get_discipline(profile['major'],
                                          education_dic['Doctor'],
                                          discipline_phrase_dic)
         ]
     elif 'Master' in education_dic:
         return ['Master'], [
             ProfileHelper.get_discipline(profile['major'],
                                          education_dic['Master'],
                                          discipline_phrase_dic)
         ]
     elif 'Bachelor' in education_dic:
         return ['Bachelor'], [
             ProfileHelper.get_discipline(profile['major'],
                                          education_dic['Bachelor'],
                                          discipline_phrase_dic)
         ]
     else:
         return [], []
Пример #3
0
 def _get_education_words(self, education_dict):
     education_phrase_dict = TextHelper.get_dict_pattern(
         self.raw_position, education_dict)
     if len(education_phrase_dict) == 0:
         default_education_requirement = "Bachelor"
         self.new_words_list.append(default_education_requirement)
         return {default_education_requirement: 1}
     else:
         return education_phrase_dict
Пример #4
0
 def get_skills(profile, skills_dic, debug=False):
     skill_phrases = ' '.join(profile['skills'])
     skill_phrases = SegmentHelper.normalize(
         TextHelper.unicode_to_ascii(skill_phrases))
     if debug:
         print("right after normalize: %s" % skill_phrases)
     skill_phrases_dict = TextHelper.get_dict_pattern(
         skill_phrases, skills_dic)
     if len(skill_phrases_dict) == 0:
         # print ("can not found skills in %s" % str(skills))
         return []
     else:
         return skill_phrases_dict.keys()
Пример #5
0
 def _get_responsibility_words(self, education_dict):
     return TextHelper.get_dict_pattern(self.raw_position, education_dict)
Пример #6
0
 def _get_discipline_words(self, discipline_dict):
     discipline_phrase_dict = TextHelper.get_dict_pattern(
         self.raw_position, discipline_dict)
     return discipline_phrase_dict
Пример #7
0
 def _get_skill_words(self, skill_dict):
     return TextHelper.get_dict_pattern(self.raw_position, skill_dict)