Exemplo n.º 1
0
	def __init__(self):
		self.db = DBHelper()
		#self.w2v = Word2Vec()
		self.gally = Gallimaufry()
		self.favors = self.db.get_all_favor_entrys()
		self.entryids = self.db.get_entryids()
Exemplo n.º 2
0
class RecommenderBase(object):
	'''
	推荐算法基类
	'''

	def __init__(self):
		self.db = DBHelper()
		#self.w2v = Word2Vec()
		self.gally = Gallimaufry()
		self.favors = self.db.get_all_favor_entrys()
		self.entryids = self.db.get_entryids()

	def recommend(self, uid):
		'''
		为给定用户推荐词条
		-----------------------------
		uid: 待推荐的用户ID
		-----------------------------
		return: [entryid,...]
		'''
		raise NotImplementedError()

	def train(self):
		'''
		模型训练
		'''
		raise NotImplementedError()

	def load_trainset(self):
		'''
		构造训练集
		----------------------------
		return: ([[feature,...],...],[result,...]),result=0表示不接受,result=1表示接受
		'''
		uids = self.db.get_uids()
		trainsetsize = len(uids) * len(self.entryids)
		featuresize = self._get_featuresize()
		trainset = np.array([np.empty(featuresize, dtype=np.float16) for i in range(trainsetsize)])
		results = np.zeros(trainsetsize)
		index = 0
		for uid in uids:
			print uid
			for entryid in self.entryids:
				self._construct_features(uid, entryid, trainset[index], train=True)
				receive = self._receive_suggestion(uid, entryid, config.trainset_timespan)
				results[index] = receive
				index += 1
		return trainset, np.array(results)

	def load_testset(self, uid):
		'''
		加载给定用户的测试数据
		------------------------------
		return: [[feature,...],...], [(entryid,result),...]
		'''
		testsetsize = len(self.entryids)
		testset = np.array([np.empty(self._get_featuresize(), dtype=np.float32) for i in range(testsetsize)])
		results = []
		index = 0
		for entryid in self.entryids:
			self._construct_features(uid, entryid, testset[index], train=False)
			receive = self._receive_suggestion(uid, entryid, config.testset_timespan)
			results.append((entryid, receive))
			index += 1
		return testset, results

	def _receive_suggestion(self, uid, entryid, timespan):
		'''
		给定用户在某个时间段内是否会接受某个词条
		--------------------------------------------
		return: 0->不会接受,1->会接受
		'''
		receive = 0
		if uid in self.favors and entryid in self.favors[uid]:
			times = self.favors[uid][entryid]
			if times[0] > timespan[1] or times[-1] < timespan[0]:
				return receive
			for time in times:
				if time <= timespan[1] and time >= timespan[0]:
					receive = 1
					break
		return receive

	def _construct_features(self, uid, entryid, features, train=False):
		'''
		利用各种特征生成方法来生成特征向量
		----------------------------------
		uid: 用户ID
		entryid: 词条ID
		train: 是否是训练样本的特征
		features: 保存特征值的ndarray数组
		'''
		index = 0
		#w2vfeatures = self.w2v.extract(uid, entryid, train)
		gallyfeatures = self.gally.extract(uid, entryid, train)
		for index in range(len(gallyfeatures)):
			features[index] = gallyfeatures[index]

	def _get_featuresize(self):
		'''
		获取特征空间的维度
		-------------------------
		return: feature size
		'''
		featuresize = 0
		featuresize += self.gally.featuresize()
		return featuresize