示例#1
0
文件: NLP.py 项目: hx-Tang/py-web
 def comment_Analysis(self, commmentlist):  #对每条评价进行打分,最高1.0,分越低说明评价越消极
     xm.set_stopword('手机关键词停词表.txt')
     totalscore = 0
     for commtent in commmentlist:
         totalscore = totalscore + xm.sentiment(commtent)
     avescore = totalscore / len(commmentlist)
     return avescore
示例#2
0
文件: NLP.py 项目: hx-Tang/py-web
    def keyword_extract(self, commentlist):  #提取关键词
        keywordRaw = []
        keywordFinal = []
        xm.set_stopword('手机关键词停词表.txt')
        for commtent in commentlist:
            keywordRaw = keywordRaw + xm.keyword(commtent)
        keywordReal = keywordRaw[0:40]
        for keyword in keywordReal:
            keywordFinal.append(keyword[0])

        return keywordFinal
示例#3
0
#!/usr/bin/env python
# coding=utf-8

import xmnlp
xmnlp.set_stopword('/home/ubuntu/downloads/simhash/dict/stop_words.utf8')


def parse_string(str):
    res = ''
    for p in xmnlp.keyphrase(str):
        res = res + ''.join(p) + ','

    return (res)
示例#4
0
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE."""

import sys
sys.path.append("..")

if sys.version_info[0] == 2:
    reload(sys)
    sys.setdefaultencoding('utf8')

import xmnlp
xmnlp.set_userdict('./userdict.txt')
xmnlp.set_stopword('./stopword.txt')

doc = """自然语言处理: 是人工智能和语言学领域的分支学科。
在这此领域中探讨如何处理及运用自然语言;自然语言认知则是指让电脑“懂”人类的语言。 
自然语言生成系统把计算机数据转化为自然语言。自然语言理解系统把自然语言转化为计算机程序更易于处理的形式。"""

out = []
tagset = set()
for word, tag in xmnlp.tag(doc, hmm=True):
    out.append(word + ' ' + tag)
    tagset.add(tag)
print(' / '.join(out))
print()

for tag in tagset:
    print(tag, ':', xmnlp.tag_mean(tag))
示例#5
0
    # 获取文本和停用词
    doc = get_doc(text_file)
    stopWords = get_text(file_name=stopwords_file)
    # print(doc)

    # 切分成句子
    sents = doc2sent(doc)
    # print(sents)

    # 摘要提取
    word_list = [segment_jieba(sent, stopWords) for sent in sents]
    # print(word_list)

    rank = textrank.TextRank(word_list)
    rank.solve()
    # key_sents = [sents[index] for index in rank.top_index(5)]
    # print(key_sents)
    for index in rank.top_index(5):
        print(sents[index])

    # keyword_rank = textrank.KeywordTextRank(word_list)
    # keyword_rank.solve()
    # for w in keyword_rank.top_index(10):
    #     print(w)

    # xmnlp摘要提取
    xmnlp.set_stopword('/home/kdd/nlp/stop_words.txt')
    xmnlp.set_userdict('/home/kdd/nlp/userdict.txt')
    t = xmnlp.keyphrase(doc)
    print(t)