Пример #1
0
def predict_with_fixed_value(forward,comment,like,submission=True):
	# type check
	if isinstance(forward,int) and isinstance(forward,int) and isinstance(forward,int):
		pass
	else:
		raise TypeError("forward,comment,like should be type 'int' ")
	
	traindata,testdata = loadData()
	
	# score on the training set
	train_real_pred = traindata[['forward','comment','like']]
	train_real_pred['fp'],train_real_pred['cp'],train_real_pred['lp'] = forward,comment,like
	print ("Score on the training set:{0:.2f}%".format(precision(train_real_pred.values)*100))
	
	# predict on the test data with fixed value, generate submission file
	if submission:
		test_pred = testdata[['uid','mid']]
		test_pred['fp'],test_pred['cp'],test_pred['lp'] = forward,comment,like
		
		result = []
		filename = "weibo_predict_{}_{}_{}.txt".format(forward,comment,like)
		for _,row in test_pred.iterrows():
			result.append("{0}\t{1}\t{2},{3},{4}\n".format(row[0],row[1],row[2],row[3],row[4]))
		f = open(filename,'w')
		f.writelines(result)
		f.close()
		print ('generate submission file "{}"'.format(filename))
def predict_with_fixed_value(forward,comment,like,submission=True):
	# type check
	if isinstance(forward,int) and isinstance(forward,int) and isinstance(forward,int):
		pass
	else:
		raise TypeError("forward,comment,like should be type 'int' ")
	
	traindata,testdata = loadData()
	
	#score on the training set
	train_real_pred = traindata[['forward','comment','like']]
	train_real_pred['fp'],train_real_pred['cp'],train_real_pred['lp'] = forward,comment,like
	print "Score on the training set:{0:.2f}%".format(precision(train_real_pred.values)*100)
	
	#predict on the test data with fixed value, generate submission file
	if submission:
		test_pred = testdata[['uid','mid']]
		test_pred['fp'],test_pred['cp'],test_pred['lp'] = forward,comment,like
		
		result = []
		filename = "weibo_predict_{}_{}_{}.txt".format(forward,comment,like)
		for _,row in test_pred.iterrows():
			result.append("{0}\t{1}\t{2},{3},{4}\n".format(row[0],row[1],row[2],row[3],row[4]))
		f = open(filename,'w')
		f.writelines(result)
		f.close()
		print 'generate submission file "{}"'.format(filename)
Пример #3
0
def predict_with_stat(stat="median",submission=True):
	"""
	stat:
		string
		min,max,mean,median
	"""
	stat_dic = genUidStat()
    
    # 載入資料 并設定欄位
	traindata,testdata = loadData()
	
	# get stat for each uid
    
	forward,comment,like = [],[],[]
	for uid in traindata['uid']:
		if uid in stat_dic:
			forward.append(int(stat_dic[uid]["forward_"+stat]))
			comment.append(int(stat_dic[uid]["comment_"+stat]))
			like.append(int(stat_dic[uid]["like_"+stat]))
		else:
			forward.append(0)
			comment.append(0)
			like.append(0)
            
	# score on the training set
	train_real_pred = traindata[['forward','comment','like']]
	train_real_pred['fp'],train_real_pred['cp'],train_real_pred['lp'] = forward,comment,like
	print ("Score on the training set:{0:.2f}%".format(precision(train_real_pred.values)*100))	
	#predict on the test data with fixed value, generate submission file
	
	if submission:
		test_pred = testdata[['uid','mid']]
		forward,comment,like = [],[],[]
		for uid in testdata['uid']:
			if uid in stat_dic: 
				forward.append(int(stat_dic[uid]["forward_"+stat]))
				comment.append(int(stat_dic[uid]["comment_"+stat]))
				like.append(int(stat_dic[uid]["like_"+stat]))
			else:
				forward.append(0)
				comment.append(0)
				like.append(0)
				
				
		test_pred['fp'],test_pred['cp'],test_pred['lp'] = forward,comment,like
		
		result = []
		filename = "weibo_predict_{}.txt".format(stat)

		for _,row in test_pred.iterrows():
			result.append("{0}\t{1}\t{2},{3},{4}\n".format(row[0],row[1],row[2],row[3],row[4]))
		f = open(filename,'w')
		f.writelines(result)
		f.close()
		print ('generate submission file "{}"'.format(filename))
def predict_with_stat(stat="median",submission=True):
	"""
	stat:
		string
		min,max,mean,median
	"""
	stat_dic = genUidStat()
	traindata,testdata = loadData()
	
	#get stat for each uid
	forward,comment,like = [],[],[]
	for uid in traindata['uid']:
		if stat_dic.has_key(uid):
			forward.append(int(stat_dic[uid]["forward_"+stat]))
			comment.append(int(stat_dic[uid]["comment_"+stat]))
			like.append(int(stat_dic[uid]["like_"+stat]))
		else:
			forward.append(0)
			comment.append(0)
			like.append(0)
	#score on the training set
	train_real_pred = traindata[['forward','comment','like']]
	train_real_pred['fp'],train_real_pred['cp'],train_real_pred['lp'] = forward,comment,like
	print "Score on the training set:{0:.2f}%".format(precision(train_real_pred.values)*100)
	
	#predict on the test data with fixed value, generate submission file
	if submission:
		test_pred = testdata[['uid','mid']]
		forward,comment,like = [],[],[]
		for uid in testdata['uid']:
			if stat_dic.has_key(uid):
				forward.append(int(stat_dic[uid]["forward_"+stat]))
				comment.append(int(stat_dic[uid]["comment_"+stat]))
				like.append(int(stat_dic[uid]["like_"+stat]))
			else:
				forward.append(0)
				comment.append(0)
				like.append(0)
				
				
		test_pred['fp'],test_pred['cp'],test_pred['lp'] = forward,comment,like
		
		result = []
		filename = "weibo_predict_{}.txt".format(stat)
		for _,row in test_pred.iterrows():
			result.append("{0}\t{1}\t{2},{3},{4}\n".format(row[0],row[1],row[2],row[3],row[4]))
		f = open(filename,'w')
		f.writelines(result)
		f.close()
		print 'generate submission file "{}"'.format(filename)
Пример #5
0
def search_all_uid():
	"""
	traindata,testdata = loadData()
	stat_dic = genUidStat()
	
	#for each uid,search its best fp,cp,lp
	uid_best_pred = {}
	for uid in stat_dic:
		print "search uid: {}".format(uid)
		uid_data = traindata[traindata.uid == uid]
		args = stat_dic[uid][['forward_min','forward_median','forward_max','comment_min',\
					'comment_median','comment_max','like_min','like_median','like_max']]
		args = tuple([int(i) for i in args]) 
		fp = search(uid_data,'forward',args)	
		cp = search(uid_data,'comment',args)	
		lp = search(uid_data,'like',args)	
		uid_best_pred[uid] = [fp,cp,lp]
	"""
	#multiprocessing version for geting uid_best_pred
	traindata,testdata = loadData()
	stat_dic = genUidStat()
	uid_best_pred = {}
	pool = Pool()
	uids,f,c,l = [],[],[],[]
	for uid in stat_dic:
		print "search uid:{}".format(uid)
		uid_data = traindata[traindata.uid == uid]
		arguments = stat_dic[uid][['forward_min','forward_median','forward_max','comment_min',\
					'comment_median','comment_max','like_min','like_median','like_max']]
		arguments = tuple([int(i) for i in arguments]) 
		f.append(pool.apply_async(search,args=(uid_data,'forward',arguments)))
		c.append(pool.apply_async(search,args=(uid_data,'comment',arguments)))
		l.append(pool.apply_async(search,args=(uid_data,'like',arguments)))
		uids.append(uid)
	pool.close()
	pool.join()
	f = [i.get() for i in f]
	c = [i.get() for i in c]
	l = [i.get() for i in l]
	
	for i in range(len(uids)):
		uid_best_pred[uids[i]] = [f[i],c[i],l[i]]
	
	try:
		cPickle.dump(uid_best_pred,open('uid_best_pred.pkl','w'))
	except Exception:
		pass
		
	return uid_best_pred
Пример #6
0
def predict_by_search(submission=True):
	traindata,testdata = loadData()
	uid_best_pred = search_all_uid()
	print "search done,now predict on traindata and testdata..."

	#predict traindata with uid's best fp,cp,lp
	forward,comment,like = [],[],[]
	for uid in traindata['uid']:
		if uid_best_pred.has_key(uid):
			forward.append(int(uid_best_pred[uid][0]))
			comment.append(int(uid_best_pred[uid][1]))
			like.append(int(uid_best_pred[uid][2]))
		else:
			forward.append(0)
			comment.append(0)
			like.append(0)
	
	#score on the traindata
	train_real_pred = traindata[['forward','comment','like']]
	train_real_pred['fp'],train_real_pred['cp'],train_real_pred['lp'] = forward,comment,like
	print "Score on the training set:{0:.2f}%".format(precision(train_real_pred.values)*100)	
	
	
	if submission:
		test_pred = testdata[['uid','mid']]
		forward,comment,like = [],[],[]
		for uid in testdata['uid']:
			if uid_best_pred.has_key(uid):
				forward.append(int(uid_best_pred[uid][0]))
				comment.append(int(uid_best_pred[uid][1]))
				like.append(int(uid_best_pred[uid][2]))
			else:
				forward.append(0)
				comment.append(0)
				like.append(0)
		test_pred['fp'],test_pred['cp'],test_pred['lp'] = forward,comment,like
		
		#generate submission file
		result = []
		filename = "weibo_predict_search.txt"
		for _,row in test_pred.iterrows():
			result.append("{0}\t{1}\t{2},{3},{4}\n".format(row[0],row[1],row[2],row[3],row[4]))
		f = open(filename,'w')
		f.writelines(result)
		f.close()
		print 'generate submission file "{}"'.format(filename)