Exemplo n.º 1
0
def main():
	loader = Loader(FILE_PATH, PREDICT_PATH)
	loader.load_item_base()

	item_base = ItemBase(loader, 5)
	loader2 = Loader(FILE_PATH, PREDICT_PATH)
	loader2.load_user_base()
	item_base.user_data = loader2.data
	try:
		item_base.mean = pickle.load(open('temp/item_base/user_mean.p', 'rb'))
	except Exception:
		item_base.mean()
		pickle.dump(item_base.mean, open('temp/item_base/user_mean.p', 'wb'))

	try:
		item_base.cosine = pickle.load(open('temp/item_base/ad_cosine.matrix', 'rb'))
	except Exception:
		item_base.similarities()
		pickle.dump(item_base.cosine, open('temp/item_base/ad_cosine.matrix', 'wb'))

	
	print '> Testing model'
	k_values = [5, 10, 20, 30]
	results = {}
	results.setdefault('RMSE', [])
	results.setdefault('Precision', [])
	results['K'] = k_values

	kf = cross_validation.KFold(len(item_base.loader.items), n_folds=5)
	for k in k_values:
		item_base.k = k
		RMSE = 0.
		precision = 0.
		for train_index, test_index in kf:
			item_base.train_idx = train_index
			item_base.test_idx = test_index
			item_base.knn()
			RMSE += item_base.test_error(test_index)
			precision += item_base.test_topN(test_index)
		print str.format('RMSE [k={0}] = {1}', k, RMSE/5)
		print str.format('PRECISION@10 [k={0}] = {1}', k, precision/5)
		results['RMSE'].append(RMSE / 5)
		results['Precision'].append(precision / 5)
	print results
Exemplo n.º 2
0
			predictions = sorted(predictions, key=operator.itemgetter(1))
			predictions.reverse()
			top10[user] = predictions[0:10]
		file = open("temp/slope_one/top10.txt", "wb")
		for user,items in top10.items():
			file.write(str.format("{0}\n", user))
			for data in items:
				file.write(str.format('\t"{0}"\n', data[0]))

if __name__ == '__main__':

	loader = Loader(FILE_PATH, PREDICT_PATH)
	loader.load_user_base()
	slope_one = SlopeOne(loader)
	loader2 = Loader(FILE_PATH, PREDICT_PATH)
	loader2.load_item_base()
	slope_one.item_data = loader2.data

	try:
		slope_one.avg = pickle.load(open('temp/slope_one/mean.p', 'rb'))
	except Exception:
		slope_one.mean()
		pickle.dump(slope_one.avg, open('temp/slope_one/mean.p', 'wb'))

	try:
		print '> Loading dev matrix'
		f = open('temp/slope_one/freqs.p', 'rb')
		slope_one.freqs = pickle.load(f)
		f.close()
		f = open('temp/slope_one/diffs.p', 'rb')
		slope_one.diffs = pickle.load(f)