def textsegment(): countnumber = readWeiboData.count()[0] for i in range(1,countnumber): textdata = readWeiboData.getText(i) seg_list = jieba.cut_for_search(textdata) texttosql = " ".join(seg_list) writeWeiboData_ByNLP.textWriteToSql(texttosql,i)
def frequency(): textdict={} countnumber = readWeiboData.count()[0] for i in range(1,countnumber): textdata = readWeiboData.getText(i) for word in jieba.cut(textdata): word = word.encode('utf8') textdict[word] = textdict.get(word, 0) + 1 return textdict
def keywords(): countnumber = readWeiboData.count()[0] for i in range(1,countnumber): textdata = readWeiboData.getText(i) # textdata = str(textdata).split('http')[0:-1] text = str(textdata).decode('utf8') text = SnowNLP(text) texttosql = '' for j in range(0,len(text.keywords(3))): texttosql+=text.keywords(3)[j]+" " writeWeiboData.keywordsWriteToSql(texttosql,i)