Пример #1
0
    def get_clean_keywords_list(self, unique=True):
        """
        @summary: 获取规则化的关键词列表,  由于用户输入关键词可能含有中文标点等情况.
        #统一为半角,小写,用户输入关键词按照1.自然分词  2.用户本身的输入间隔进行
        #返回这两种分割非重复的关键词列表.

        """
        uniform_keywords = uniform(self.keywords)
        #         uniform_keywords = replace_punctuation(uniform_keywords)
        seg_keywords = jieba.cut(uniform_keywords)

        seg_keywords = [
            keyword.lower() for keyword in seg_keywords
            if not is_other(keyword)
        ]

        user_seg_keywords = uniform_keywords.split()

        user_seg_keywords = [
            keyword.lower() for keyword in user_seg_keywords
            if not is_other(keyword)
        ]

        if unique:
            seg_keywords = get_ordered_unique(seg_keywords)
            user_seg_keywords = get_ordered_unique(user_seg_keywords)

        if seg_keywords == user_seg_keywords:
            return [seg_keywords]
        else:
            return [seg_keywords, user_seg_keywords]
Пример #2
0
    def get_clean_keywords_list(self, unique=True):
        """
        @summary: 获取规则化的关键词列表,  由于用户输入关键词可能含有中文标点等情况.
        #统一为半角,小写,用户输入关键词按照1.自然分词  2.用户本身的输入间隔进行
        #返回这两种分割非重复的关键词列表.

        """
        uniform_keywords = uniform(self.keywords)
#         uniform_keywords = replace_punctuation(uniform_keywords)
        seg_keywords = jieba.cut(uniform_keywords)

        seg_keywords = [ keyword.lower() for keyword in seg_keywords if not is_other(keyword) ]

        user_seg_keywords = uniform_keywords.split()

        user_seg_keywords = [ keyword.lower() for keyword in user_seg_keywords if not is_other(keyword)]

        if unique:
            seg_keywords = get_ordered_unique(seg_keywords)
            user_seg_keywords = get_ordered_unique(user_seg_keywords)

        if seg_keywords == user_seg_keywords:
            return [seg_keywords]
        else:
            return [seg_keywords, user_seg_keywords]
Пример #3
0
    def get_all_eng_words(self, unique=True):
        """
        @summary: 获取关键词和职位描述中的所有单词或者单词+数字类型 ,如cocos2d
        """
        desc = uniform(self.keywords + ' ' + self.job_desc)

        words = jieba.cut(desc)

        eng_words = [word for word in words if is_num_word(word)]
        if unique:
            eng_words = get_ordered_unique(eng_words)
        return eng_words
Пример #4
0
    def get_all_eng_words(self, unique=True):
        """
        @summary: 获取关键词和职位描述中的所有单词或者单词+数字类型 ,如cocos2d
        """
        desc = uniform(self.keywords + ' ' + self.job_desc)

        words = jieba.cut(desc)

        eng_words = [word for word in words if is_num_word(word)]
        if unique:
            eng_words = get_ordered_unique(eng_words)
        return eng_words
Пример #5
0
 def get_uniform_keywords(self):
     return uniform(self.keywords)
Пример #6
0
 def get_uniform_keywords(self):
     return uniform(self.keywords)