Exemplo n.º 1
0
	def load_blogs(self):
		'''
		读取微博数据,为训练word2vec做准备
		'''
		self.load_stopwords()
		filepath = 'blogs.txt'
		dbhelper = DBHelper()
		i = 0
		size = 100000
		index = 0
		with open(filepath, 'a') as writer:
			while index < 227:				
				blogs = dbhelper.select('SELECT mc,rmc FROM microblog ORDER BY blog_id DESC LIMIT %s,%s' % (index*size, (index+1)*size))
				if not blogs:
					break
				for blog in blogs:
					print i
					i += 1
					msg = ''
					if blog[1]:
						msg = blog[1]
					elif blog[0]:
						msg = blog[0]
					tokens = jieba.cut(msg)
					tokens = [token for token in list(tokens) if token not in self.stopwords]
					if len(tokens) >= 5:
						writer.write(' '.join(tokens) + '\n')
				index += 1
Exemplo n.º 2
0
def expand_entry():
	'''
	扩展词条的背景材料
	'''
	dbhelper = DBHelper()
	searcher = Searcher()
	entrys = dbhelper.select("SELECT entryid,name,category FROM entry WHERE background IS NULL")
	for entry in entrys:
		print entry[1], entry[2]
		searcher.expand(entry[0], entry[1], entry[2])
Exemplo n.º 3
0
def recommend(filepath):
	'''
	为所有用户进行推荐,并保存计算结果
	-------------------------------
	filepath: 保存推荐结果的文件路径
	-------------------------------
	return: {uid:[entryid,...],...}
	'''
	predictions = {}
	dbhelper = DBHelper()
	uids = dbhelper.get_uids()
	recommender = LogitRegRecommender()
	for uid in uids:
		print 'predict', uid
		result = recommender.recommend(uid)
		predictions[uid] = result
	evaluator = Evaluator()
	precision,recall,F1 = evaluator.evaluate(predictions, filepath)
	print precision,recall,F1