예제 #1
0
            sql = "UPDATE tj_news_clob SET news_group=" + str(
                group) + " WHERE  news_id='" + newsid + "'"
            cursor.execute(sql)
            if y_pred[i] in countdict.keys():
                countdict[y_pred[i]] = countdict[y_pred[i]] + 1
            else:
                countdict[y_pred[i]] = 1
        print('修改新闻类别成功')
        print('km.cluster_centers_.shape[0]:', km.cluster_centers_.shape[0])
        for k in range(km.cluster_centers_.shape[0]):
            sql1 = "INSERT INTO tj_cluster_centers (group_id, wv,count) VALUES (" + str(
                k) + ", '" + str(km.cluster_centers_[k]) + "'," + str(
                    countdict[k]) + ")"
            cursor.execute(sql1)
        db.close()
        print("修改类别成功")


np.set_printoptions(threshold=np.NaN)
start = DateUtil.nowToStrNormal()
print(start)
c = Cluster(stopword_path='hlt_stop_words.txt')
#分词
# c.wordcut()
#词向量
newsids, tdm = c.wordvector(sql=False)
print('tdm:', tdm.shape)
#kmeans
c.dokmeans(newsids, tdm, 200, 400)
print("start:", start, "  end:", DateUtil.nowToStrNormal())