def predict_with_fixed_value(forward,comment,like,submission=True): # type check if isinstance(forward,int) and isinstance(forward,int) and isinstance(forward,int): pass else: raise TypeError("forward,comment,like should be type 'int' ") traindata,testdata = loadData() # score on the training set train_real_pred = traindata[['forward','comment','like']] train_real_pred['fp'],train_real_pred['cp'],train_real_pred['lp'] = forward,comment,like print ("Score on the training set:{0:.2f}%".format(precision(train_real_pred.values)*100)) # predict on the test data with fixed value, generate submission file if submission: test_pred = testdata[['uid','mid']] test_pred['fp'],test_pred['cp'],test_pred['lp'] = forward,comment,like result = [] filename = "weibo_predict_{}_{}_{}.txt".format(forward,comment,like) for _,row in test_pred.iterrows(): result.append("{0}\t{1}\t{2},{3},{4}\n".format(row[0],row[1],row[2],row[3],row[4])) f = open(filename,'w') f.writelines(result) f.close() print ('generate submission file "{}"'.format(filename))
def predict_with_fixed_value(forward,comment,like,submission=True): # type check if isinstance(forward,int) and isinstance(forward,int) and isinstance(forward,int): pass else: raise TypeError("forward,comment,like should be type 'int' ") traindata,testdata = loadData() #score on the training set train_real_pred = traindata[['forward','comment','like']] train_real_pred['fp'],train_real_pred['cp'],train_real_pred['lp'] = forward,comment,like print "Score on the training set:{0:.2f}%".format(precision(train_real_pred.values)*100) #predict on the test data with fixed value, generate submission file if submission: test_pred = testdata[['uid','mid']] test_pred['fp'],test_pred['cp'],test_pred['lp'] = forward,comment,like result = [] filename = "weibo_predict_{}_{}_{}.txt".format(forward,comment,like) for _,row in test_pred.iterrows(): result.append("{0}\t{1}\t{2},{3},{4}\n".format(row[0],row[1],row[2],row[3],row[4])) f = open(filename,'w') f.writelines(result) f.close() print 'generate submission file "{}"'.format(filename)
def predict_with_stat(stat="median",submission=True): """ stat: string min,max,mean,median """ stat_dic = genUidStat() # 載入資料 并設定欄位 traindata,testdata = loadData() # get stat for each uid forward,comment,like = [],[],[] for uid in traindata['uid']: if uid in stat_dic: forward.append(int(stat_dic[uid]["forward_"+stat])) comment.append(int(stat_dic[uid]["comment_"+stat])) like.append(int(stat_dic[uid]["like_"+stat])) else: forward.append(0) comment.append(0) like.append(0) # score on the training set train_real_pred = traindata[['forward','comment','like']] train_real_pred['fp'],train_real_pred['cp'],train_real_pred['lp'] = forward,comment,like print ("Score on the training set:{0:.2f}%".format(precision(train_real_pred.values)*100)) #predict on the test data with fixed value, generate submission file if submission: test_pred = testdata[['uid','mid']] forward,comment,like = [],[],[] for uid in testdata['uid']: if uid in stat_dic: forward.append(int(stat_dic[uid]["forward_"+stat])) comment.append(int(stat_dic[uid]["comment_"+stat])) like.append(int(stat_dic[uid]["like_"+stat])) else: forward.append(0) comment.append(0) like.append(0) test_pred['fp'],test_pred['cp'],test_pred['lp'] = forward,comment,like result = [] filename = "weibo_predict_{}.txt".format(stat) for _,row in test_pred.iterrows(): result.append("{0}\t{1}\t{2},{3},{4}\n".format(row[0],row[1],row[2],row[3],row[4])) f = open(filename,'w') f.writelines(result) f.close() print ('generate submission file "{}"'.format(filename))
def predict_with_stat(stat="median",submission=True): """ stat: string min,max,mean,median """ stat_dic = genUidStat() traindata,testdata = loadData() #get stat for each uid forward,comment,like = [],[],[] for uid in traindata['uid']: if stat_dic.has_key(uid): forward.append(int(stat_dic[uid]["forward_"+stat])) comment.append(int(stat_dic[uid]["comment_"+stat])) like.append(int(stat_dic[uid]["like_"+stat])) else: forward.append(0) comment.append(0) like.append(0) #score on the training set train_real_pred = traindata[['forward','comment','like']] train_real_pred['fp'],train_real_pred['cp'],train_real_pred['lp'] = forward,comment,like print "Score on the training set:{0:.2f}%".format(precision(train_real_pred.values)*100) #predict on the test data with fixed value, generate submission file if submission: test_pred = testdata[['uid','mid']] forward,comment,like = [],[],[] for uid in testdata['uid']: if stat_dic.has_key(uid): forward.append(int(stat_dic[uid]["forward_"+stat])) comment.append(int(stat_dic[uid]["comment_"+stat])) like.append(int(stat_dic[uid]["like_"+stat])) else: forward.append(0) comment.append(0) like.append(0) test_pred['fp'],test_pred['cp'],test_pred['lp'] = forward,comment,like result = [] filename = "weibo_predict_{}.txt".format(stat) for _,row in test_pred.iterrows(): result.append("{0}\t{1}\t{2},{3},{4}\n".format(row[0],row[1],row[2],row[3],row[4])) f = open(filename,'w') f.writelines(result) f.close() print 'generate submission file "{}"'.format(filename)
def search_all_uid(): """ traindata,testdata = loadData() stat_dic = genUidStat() #for each uid,search its best fp,cp,lp uid_best_pred = {} for uid in stat_dic: print "search uid: {}".format(uid) uid_data = traindata[traindata.uid == uid] args = stat_dic[uid][['forward_min','forward_median','forward_max','comment_min',\ 'comment_median','comment_max','like_min','like_median','like_max']] args = tuple([int(i) for i in args]) fp = search(uid_data,'forward',args) cp = search(uid_data,'comment',args) lp = search(uid_data,'like',args) uid_best_pred[uid] = [fp,cp,lp] """ #multiprocessing version for geting uid_best_pred traindata,testdata = loadData() stat_dic = genUidStat() uid_best_pred = {} pool = Pool() uids,f,c,l = [],[],[],[] for uid in stat_dic: print "search uid:{}".format(uid) uid_data = traindata[traindata.uid == uid] arguments = stat_dic[uid][['forward_min','forward_median','forward_max','comment_min',\ 'comment_median','comment_max','like_min','like_median','like_max']] arguments = tuple([int(i) for i in arguments]) f.append(pool.apply_async(search,args=(uid_data,'forward',arguments))) c.append(pool.apply_async(search,args=(uid_data,'comment',arguments))) l.append(pool.apply_async(search,args=(uid_data,'like',arguments))) uids.append(uid) pool.close() pool.join() f = [i.get() for i in f] c = [i.get() for i in c] l = [i.get() for i in l] for i in range(len(uids)): uid_best_pred[uids[i]] = [f[i],c[i],l[i]] try: cPickle.dump(uid_best_pred,open('uid_best_pred.pkl','w')) except Exception: pass return uid_best_pred
def predict_by_search(submission=True): traindata,testdata = loadData() uid_best_pred = search_all_uid() print "search done,now predict on traindata and testdata..." #predict traindata with uid's best fp,cp,lp forward,comment,like = [],[],[] for uid in traindata['uid']: if uid_best_pred.has_key(uid): forward.append(int(uid_best_pred[uid][0])) comment.append(int(uid_best_pred[uid][1])) like.append(int(uid_best_pred[uid][2])) else: forward.append(0) comment.append(0) like.append(0) #score on the traindata train_real_pred = traindata[['forward','comment','like']] train_real_pred['fp'],train_real_pred['cp'],train_real_pred['lp'] = forward,comment,like print "Score on the training set:{0:.2f}%".format(precision(train_real_pred.values)*100) if submission: test_pred = testdata[['uid','mid']] forward,comment,like = [],[],[] for uid in testdata['uid']: if uid_best_pred.has_key(uid): forward.append(int(uid_best_pred[uid][0])) comment.append(int(uid_best_pred[uid][1])) like.append(int(uid_best_pred[uid][2])) else: forward.append(0) comment.append(0) like.append(0) test_pred['fp'],test_pred['cp'],test_pred['lp'] = forward,comment,like #generate submission file result = [] filename = "weibo_predict_search.txt" for _,row in test_pred.iterrows(): result.append("{0}\t{1}\t{2},{3},{4}\n".format(row[0],row[1],row[2],row[3],row[4])) f = open(filename,'w') f.writelines(result) f.close() print 'generate submission file "{}"'.format(filename)