def run_para(oriTrain,oriTest,field,numField,postfix,isToCSV=True): if isToCSV: csv_2_ffm.to_ffm(oriTrain,oriTest,field,numField=numField,postfix= postfix) trainFile = oriTrain[0:-4]+ '_'+ postfix +'.ffm' testFile = oriTest[0:-4]+ '_'+ postfix +'.ffm' t=str(150) ; k=str(8) ; l=str(0.00002) resultName = '../submit/ffm_'+ postfix + '_t{t}_k{k}_l{l}'.format(t=t,k=k,l=l,feat=str(len(field))) assignCMDPara=' -t {t} -k {k} -l {l} -s 4 -r 0.02 '.format(t=t,k=k,l=l) ffm.runffm(trainFile,testFile,resultName,assignCMDPara)
def onceRun(day): oriTrain = '../data/temp/train_preAction_{day}.csv'.format(day=day) oriTest = '../data/temp/test_preAction_19.csv' postfix='m_p_u_amc_s{feat}_preAction_{day}'.format(feat=str(len(field)),day=day) csv_2_ffm.to_ffm(oriTrain,oriTest,field,numField=[],postfix= postfix) trainFile = oriTrain[0:-4]+ '_'+ postfix +'.ffm' testFile = oriTest[0:-4]+ '_'+ postfix +'.ffm' t=str(150) k=str(8) l=str(0.00002) resultName = '../submit/ffm_'+ postfix + '_t{t}_k{k}_l{l}'.format(t=t,k=k,l=l,feat=str(len(ori11Field))) assignCMDPara=' -t {t} -k {k} -l {l} -s 4 -r 0.02 '.format(t=t,k=k,l=l) ffm.runffm(trainFile,testFile,resultName,assignCMDPara)
def run_userIDappID_2(): ori11Field=copyField[:] oriTrain = '../data/temp/train_preAction_userID,appID_5_.csv' oriTest = '../data/temp/test_preAction_userID,appID_5_.csv' ori11Field.extend(['preAction','preClickCount','preClickDist','preConvCount','preConvDist']) numField=['preClickCount','preClickDist','preConvCount','preConvDist'] postfix='m_p_u_amc_s{feat}_preAction_appIDuserID_5'.format(feat=str(len(ori11Field))) csv_2_ffm.to_ffm(oriTrain,oriTest,ori11Field,numField=numField,postfix= postfix) trainFile = oriTrain[0:-4]+ '_'+ postfix +'.ffm' testFile = oriTest[0:-4]+ '_'+ postfix +'.ffm' t=str(150) ;k=str(8) ;l=str(0.00002) resultName = '../submit/ffm_'+ postfix + '_t{t}_k{k}_l{l}'.format(t=t,k=k,l=l,feat=str(len(ori11Field))) assignCMDPara=' -t {t} -k {k} -l {l} -s 4 -r 0.02 '.format(t=t,k=k,l=l) ffm.runffm(trainFile,testFile,resultName,assignCMDPara)
def oriInRun(): oriTrain = '../data/sample/recent20.csv' oriTest = '../data/sample/test_oldPositionIDadID.csv' postfix='oldPositionIDadID_recent20_s{feat}'.format(feat=str(len(ori11Field))) csv_2_ffm.to_ffm(oriTrain,oriTest,ori11Field,numField=[],postfix= postfix) trainFile = oriTrain[0:-4]+ '_'+ postfix +'.ffm' testFile = oriTest[0:-4]+ '_'+ postfix +'.ffm' t=str(150) k=str(8) l=str(0.00002) resultName ='../data/sample/result/ffm_'+ postfix + '_t{t}_k{k}_l{l}'.format(t=t,k=k,l=l,feat=str(len(ori11Field))) assignCMDPara=' -t {t} -k {k} -l {l} -s 4 -r 0.02 '.format(t=t,k=k,l=l) instanceID=pd.read_csv(oriTest)['instanceID'].tolist() return ffm.runffm(trainFile,testFile,resultName,assignCMDPara,instanceID=instanceID)
def newInRun(): newInField=ori11Field[:] newInField.extend( [ "hometown" ,"residence","sitesetID","appPlatform" "hourTime","appSum"] ) oriTrain = '../data/sample/mergeAll_recent20.csv' oriTest = '../data/sample/test_newPositionIDadID.csv' postfix='newPositionIDadID_recent20_s{feat}'.format(feat=str(len(newInField))) csv_2_ffm.to_ffm(oriTrain,oriTest,ori11Field,numField=[],postfix= postfix) trainFile = oriTrain[0:-4]+ '_'+ postfix +'.ffm' testFile = oriTest[0:-4]+ '_'+ postfix +'.ffm' t=str(150) k=str(8) l=str(0.00002) resultName = '../data/sample/result/ffm_'+ postfix + '_t{t}_k{k}_l{l}'.format(t=t,k=k,l=l,feat=str(len(ori11Field))) assignCMDPara=' -t {t} -k {k} -l {l} -s 4 -r 0.02 '.format(t=t,k=k,l=l) instanceID=pd.read_csv(oriTest)['instanceID'].tolist() return ffm.runffm(trainFile,testFile,resultName,assignCMDPara,instanceID=instanceID)
def mergeRun(field_21, numField_21, fixDay, addPostfix='', needMerge=True, toffm=True): # merge_train = merge_appID_userID('../data/temp/train_preAction_userID_{fixDay}.csv'.format(fixDay=fixDay), # '../data/temp/train_preAction_userIDappID_{fixDay}.csv'.format(fixDay=fixDay), # '../data/temp/merge_appID_userID_{fixDay}.csv'.format(fixDay=fixDay)) # merge_test = merge_appID_userID('../data/temp/test_preAction_userID_{fixDay}.csv'.format(fixDay=fixDay), # '../data/temp/test_preAction_userIDappID_{fixDay}.csv'.format(fixDay=fixDay), # '../data/temp/merge_test_appID_userID_{fixDay}.csv'.format(fixDay=fixDay)) merge_train = '../data/temp/old/merge_appID_userID_{fixDay}.csv'.format( fixDay=fixDay) merge_test = '../data/temp/old/merge_test_appID_userID_{fixDay}.csv'.format( fixDay=fixDay) postfix = 'mergeAppUser_s{feat}_preAction_{fixDay}_{addPostfix}'.format( feat=str(len(field_21)), fixDay=fixDay, addPostfix=addPostfix) toffm = False if toffm: csv_2_ffm.to_ffm(merge_train, merge_test, field_21, numField=numField_21, postfix=postfix) trainFile = merge_train[0:-4] + '_' + postfix + '.ffm' testFile = merge_test[0:-4] + '_' + postfix + '.ffm' t = str(120) k = str(8) l = str(0.00002) resultName = '../submit/ffm_' + postfix + '_t{t}_k{k}_l{l}'.format( t=t, k=k, l=l, feat=str(len(field_21))) assignCMDPara = ' -t {t} -k {k} -l {l} -s 4 -r 0.02 '.format(t=t, k=k, l=l) ffm.runffm(trainFile, testFile, resultName, assignCMDPara)
def validFFM(trainFile='../data/sample/m_p_u_amc.csv', predictFile='', resultName='../submit/ffmv', paras='-l 0.00002 -k 8 -t 100 -r 0.02 -s 8', isStop=False, onlyValid=True, field=[], numField=[]): timestr = datetime.now().strftime('%Y-%m-%d-%H-%M-%S') modelFile = './valid/ffm-model-' + resultName.split('/')[-1] + '_' + ( 'stop' if isStop else 'nostop') + '_' + timestr resultOut = './valid/ffm-out-' + timestr start = datetime.now() if isStop and not onlyValid: raise Exception('autostop must with valied') #生成划分---------------------- trainAddr, testAddr = splitTrainTest(trainFile) csv_2_ffm.to_ffm(trainAddr, testAddr, field, numField=numField, postfix='valid') trainFileName, testFileName = (trainAddr[0:-4] + '_valid.ffm', testAddr[0:-4] + '_valid.ffm') #训练,验证---------------------- if onlyValid: paras += ' -p ' + testFileName if isStop: paras += ' --auto-stop' trainCMD = 'ffm-train.exe ' + paras + ' {trainData} {model}'.format( trainData=trainFileName, model=modelFile) print trainCMD subprocess.call(trainCMD, shell=True) #trainInfo = os.popen(trainCMD).readlines() #执行该命令 #读取命令行的输出到一个list if not onlyValid: validOut = './valid/tempout/validTempOut-' + timestr testCMD = 'ffm-predict.exe {testData} {model} {out}'.format( testData=testFileName, model=modelFile, out=validOut) subprocess.call(testCMD, shell=True) #testInfo = os.popen(testCMD).readlines() #训练完后进行最终提交预测------------------------------------ # resultFile = resultName +'_'+ ('stop' if isStop else 'nostop') +'_'+ timestr +'.csv' # predictCMD= 'ffm-predict.exe {predictData} {model} {out}'.format(predictData=predictFile,model=modelFile,out=resultOut) # subprocess.call(predictCMD,shell=True) # print 'writing submit result : ',resultFile # with open(resultFile, 'w') as fo: # fo.write('instanceID,prob\n') # for t, row in enumerate(open(resultOut), start=1): # fo.write('{0},{1}'.format(t, row)) #记录运行结果与参数 #备用,找到同时打印并记录控制台输出的方法后用 trainInfo = [] testInfo = [] logFile = '../log/ffm-log-' + resultName.split('/')[-1] + '_' + timestr for i in xrange(0, len(trainInfo)): trainInfo[i] = trainInfo[i].decode('gbk').encode('utf-8') for j in xrange(0, len(testInfo)): testInfo[j] = testInfo[j].decode('gbk').encode('utf-8') print trainInfo print testInfo timeCostInfo = '时间: {0}'.format(datetime.now() - start) with open(logFile, 'w') as fo2: fo2.write(trainCMD) fo2.writelines(trainInfo) fo2.writelines(testInfo) fo2.write(timeCostInfo) print(timeCostInfo)
'connectionType_appID', #44 'adID_advertiserID', #45 'positionType_advertiserID', #46 'camgaignID_appID', #47 'positionType_appCategory', #48 'advertiserID_appID', #49 'positionType_appID', #50 'adID_appID', #51 'camgaignID_appCategory', #52 'appID_appCategory', #53 'adID_appCategory', #54 'advertiserID_appCategory', #55 ] oriTrain = '../data/m_p_u_amc_only11twoComb.csv' oriTest = '../data/test_m_p_u_amc_only11twoComb.csv' featNum = 22 field = combAllFeat()[0:featNum] csv_2_ffm.to_ffm(oriTrain, oriTest, field, numField=[], postfix=str(featNum)) trainFile = '../data/m_p_u_amc_only11twoComb' + '_' + str(featNum) + '.ffm' testFile = '../data/test_m_p_u_amc_only11twoComb' + '_' + str(featNum) + '.ffm' t = str(250) resultName = '../submit/ffmresult_ori_and_11twoComb' + '_' + str( featNum) + '_t' + t assignCMDPara = './ffm-train -l 0.00002 -k 4 -t ' + t + ' -r 0.02 -s 8' ffm.runffm(trainFile, testFile, resultName, assignCMDPara) #--------------------------