示例#1
0
def run_para(oriTrain,oriTest,field,numField,postfix,isToCSV=True):
    if isToCSV: csv_2_ffm.to_ffm(oriTrain,oriTest,field,numField=numField,postfix= postfix)
    trainFile = oriTrain[0:-4]+ '_'+ postfix  +'.ffm'
    testFile =  oriTest[0:-4]+  '_'+ postfix  +'.ffm'
    t=str(150) ; k=str(8) ; l=str(0.00002)
    resultName = '../submit/ffm_'+ postfix  + '_t{t}_k{k}_l{l}'.format(t=t,k=k,l=l,feat=str(len(field)))
    assignCMDPara=' -t {t} -k {k} -l {l} -s 4 -r 0.02 '.format(t=t,k=k,l=l)
    ffm.runffm(trainFile,testFile,resultName,assignCMDPara)
示例#2
0
 def onceRun(day):
     oriTrain = '../data/temp/train_preAction_{day}.csv'.format(day=day)
     oriTest = '../data/temp/test_preAction_19.csv'
     postfix='m_p_u_amc_s{feat}_preAction_{day}'.format(feat=str(len(field)),day=day)
     csv_2_ffm.to_ffm(oriTrain,oriTest,field,numField=[],postfix= postfix)
     trainFile = oriTrain[0:-4]+ '_'+ postfix  +'.ffm'
     testFile =  oriTest[0:-4]+  '_'+ postfix  +'.ffm'
     t=str(150)
     k=str(8)
     l=str(0.00002)
     resultName = '../submit/ffm_'+ postfix  + '_t{t}_k{k}_l{l}'.format(t=t,k=k,l=l,feat=str(len(ori11Field)))
     assignCMDPara=' -t {t} -k {k} -l {l} -s 4 -r 0.02 '.format(t=t,k=k,l=l)
     ffm.runffm(trainFile,testFile,resultName,assignCMDPara)
示例#3
0
def run_userIDappID_2():
    ori11Field=copyField[:]
    oriTrain = '../data/temp/train_preAction_userID,appID_5_.csv'
    oriTest = '../data/temp/test_preAction_userID,appID_5_.csv'
    ori11Field.extend(['preAction','preClickCount','preClickDist','preConvCount','preConvDist'])
    numField=['preClickCount','preClickDist','preConvCount','preConvDist']
    postfix='m_p_u_amc_s{feat}_preAction_appIDuserID_5'.format(feat=str(len(ori11Field)))
    csv_2_ffm.to_ffm(oriTrain,oriTest,ori11Field,numField=numField,postfix= postfix)
    trainFile = oriTrain[0:-4]+ '_'+ postfix  +'.ffm'
    testFile =  oriTest[0:-4]+  '_'+ postfix  +'.ffm'
    t=str(150)  ;k=str(8)  ;l=str(0.00002)
    resultName = '../submit/ffm_'+ postfix  + '_t{t}_k{k}_l{l}'.format(t=t,k=k,l=l,feat=str(len(ori11Field)))
    assignCMDPara=' -t {t} -k {k} -l {l} -s 4 -r 0.02 '.format(t=t,k=k,l=l)
    ffm.runffm(trainFile,testFile,resultName,assignCMDPara)
示例#4
0
def oriInRun():
    oriTrain = '../data/sample/recent20.csv'
    oriTest = '../data/sample/test_oldPositionIDadID.csv'
    postfix='oldPositionIDadID_recent20_s{feat}'.format(feat=str(len(ori11Field)))
    csv_2_ffm.to_ffm(oriTrain,oriTest,ori11Field,numField=[],postfix= postfix)

    trainFile = oriTrain[0:-4]+ '_'+ postfix  +'.ffm'
    testFile =  oriTest[0:-4]+  '_'+ postfix  +'.ffm'
    t=str(150)
    k=str(8)
    l=str(0.00002)
    resultName ='../data/sample/result/ffm_'+ postfix  + '_t{t}_k{k}_l{l}'.format(t=t,k=k,l=l,feat=str(len(ori11Field)))
    assignCMDPara=' -t {t} -k {k} -l {l} -s 4 -r 0.02 '.format(t=t,k=k,l=l)
    instanceID=pd.read_csv(oriTest)['instanceID'].tolist()
    return ffm.runffm(trainFile,testFile,resultName,assignCMDPara,instanceID=instanceID)
示例#5
0
def newInRun():
    newInField=ori11Field[:]
    newInField.extend( [ "hometown" ,"residence","sitesetID","appPlatform" "hourTime","appSum"] )
    oriTrain = '../data/sample/mergeAll_recent20.csv'
    oriTest = '../data/sample/test_newPositionIDadID.csv'
    postfix='newPositionIDadID_recent20_s{feat}'.format(feat=str(len(newInField)))
    csv_2_ffm.to_ffm(oriTrain,oriTest,ori11Field,numField=[],postfix= postfix)

    trainFile = oriTrain[0:-4]+ '_'+ postfix  +'.ffm'
    testFile =  oriTest[0:-4]+  '_'+ postfix  +'.ffm'
    t=str(150)
    k=str(8)
    l=str(0.00002)
    resultName = '../data/sample/result/ffm_'+ postfix  + '_t{t}_k{k}_l{l}'.format(t=t,k=k,l=l,feat=str(len(ori11Field)))
    assignCMDPara=' -t {t} -k {k} -l {l} -s 4 -r 0.02 '.format(t=t,k=k,l=l)
    instanceID=pd.read_csv(oriTest)['instanceID'].tolist()
    return ffm.runffm(trainFile,testFile,resultName,assignCMDPara,instanceID=instanceID)
示例#6
0
def mergeRun(field_21,
             numField_21,
             fixDay,
             addPostfix='',
             needMerge=True,
             toffm=True):

    # merge_train = merge_appID_userID('../data/temp/train_preAction_userID_{fixDay}.csv'.format(fixDay=fixDay),
    #                              '../data/temp/train_preAction_userIDappID_{fixDay}.csv'.format(fixDay=fixDay),
    #                              '../data/temp/merge_appID_userID_{fixDay}.csv'.format(fixDay=fixDay))
    # merge_test = merge_appID_userID('../data/temp/test_preAction_userID_{fixDay}.csv'.format(fixDay=fixDay),
    #                                   '../data/temp/test_preAction_userIDappID_{fixDay}.csv'.format(fixDay=fixDay),
    #                                   '../data/temp/merge_test_appID_userID_{fixDay}.csv'.format(fixDay=fixDay))

    merge_train = '../data/temp/old/merge_appID_userID_{fixDay}.csv'.format(
        fixDay=fixDay)
    merge_test = '../data/temp/old/merge_test_appID_userID_{fixDay}.csv'.format(
        fixDay=fixDay)

    postfix = 'mergeAppUser_s{feat}_preAction_{fixDay}_{addPostfix}'.format(
        feat=str(len(field_21)), fixDay=fixDay, addPostfix=addPostfix)
    toffm = False
    if toffm:
        csv_2_ffm.to_ffm(merge_train,
                         merge_test,
                         field_21,
                         numField=numField_21,
                         postfix=postfix)
    trainFile = merge_train[0:-4] + '_' + postfix + '.ffm'
    testFile = merge_test[0:-4] + '_' + postfix + '.ffm'
    t = str(120)
    k = str(8)
    l = str(0.00002)
    resultName = '../submit/ffm_' + postfix + '_t{t}_k{k}_l{l}'.format(
        t=t, k=k, l=l, feat=str(len(field_21)))

    assignCMDPara = ' -t {t} -k {k} -l {l} -s 4 -r 0.02 '.format(t=t, k=k, l=l)
    ffm.runffm(trainFile, testFile, resultName, assignCMDPara)
示例#7
0
def validFFM(trainFile='../data/sample/m_p_u_amc.csv',
             predictFile='',
             resultName='../submit/ffmv',
             paras='-l 0.00002 -k 8 -t 100 -r 0.02 -s 8',
             isStop=False,
             onlyValid=True,
             field=[],
             numField=[]):
    timestr = datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
    modelFile = './valid/ffm-model-' + resultName.split('/')[-1] + '_' + (
        'stop' if isStop else 'nostop') + '_' + timestr
    resultOut = './valid/ffm-out-' + timestr
    start = datetime.now()

    if isStop and not onlyValid: raise Exception('autostop must with valied')

    #生成划分----------------------
    trainAddr, testAddr = splitTrainTest(trainFile)
    csv_2_ffm.to_ffm(trainAddr,
                     testAddr,
                     field,
                     numField=numField,
                     postfix='valid')
    trainFileName, testFileName = (trainAddr[0:-4] + '_valid.ffm',
                                   testAddr[0:-4] + '_valid.ffm')

    #训练,验证----------------------
    if onlyValid: paras += ' -p ' + testFileName
    if isStop: paras += ' --auto-stop'
    trainCMD = 'ffm-train.exe ' + paras + ' {trainData} {model}'.format(
        trainData=trainFileName, model=modelFile)
    print trainCMD
    subprocess.call(trainCMD, shell=True)
    #trainInfo = os.popen(trainCMD).readlines()  #执行该命令 #读取命令行的输出到一个list

    if not onlyValid:
        validOut = './valid/tempout/validTempOut-' + timestr
        testCMD = 'ffm-predict.exe {testData} {model} {out}'.format(
            testData=testFileName, model=modelFile, out=validOut)
        subprocess.call(testCMD, shell=True)
        #testInfo = os.popen(testCMD).readlines()

    #训练完后进行最终提交预测------------------------------------
    # resultFile = resultName  +'_'+ ('stop' if isStop else 'nostop')   +'_'+ timestr  +'.csv'
    # predictCMD= 'ffm-predict.exe {predictData} {model} {out}'.format(predictData=predictFile,model=modelFile,out=resultOut)
    # subprocess.call(predictCMD,shell=True)
    # print 'writing submit result : ',resultFile
    # with open(resultFile, 'w') as fo:
    #     fo.write('instanceID,prob\n')
    #     for t, row in enumerate(open(resultOut), start=1):
    #         fo.write('{0},{1}'.format(t, row))

    #记录运行结果与参数
    #备用,找到同时打印并记录控制台输出的方法后用
    trainInfo = []
    testInfo = []

    logFile = '../log/ffm-log-' + resultName.split('/')[-1] + '_' + timestr
    for i in xrange(0, len(trainInfo)):
        trainInfo[i] = trainInfo[i].decode('gbk').encode('utf-8')
    for j in xrange(0, len(testInfo)):
        testInfo[j] = testInfo[j].decode('gbk').encode('utf-8')
    print trainInfo
    print testInfo
    timeCostInfo = '时间: {0}'.format(datetime.now() - start)
    with open(logFile, 'w') as fo2:
        fo2.write(trainCMD)
        fo2.writelines(trainInfo)
        fo2.writelines(testInfo)
        fo2.write(timeCostInfo)
    print(timeCostInfo)
示例#8
0
        'connectionType_appID',  #44
        'adID_advertiserID',  #45
        'positionType_advertiserID',  #46
        'camgaignID_appID',  #47
        'positionType_appCategory',  #48
        'advertiserID_appID',  #49
        'positionType_appID',  #50
        'adID_appID',  #51
        'camgaignID_appCategory',  #52
        'appID_appCategory',  #53
        'adID_appCategory',  #54
        'advertiserID_appCategory',  #55
    ]


oriTrain = '../data/m_p_u_amc_only11twoComb.csv'
oriTest = '../data/test_m_p_u_amc_only11twoComb.csv'

featNum = 22
field = combAllFeat()[0:featNum]
csv_2_ffm.to_ffm(oriTrain, oriTest, field, numField=[], postfix=str(featNum))

trainFile = '../data/m_p_u_amc_only11twoComb' + '_' + str(featNum) + '.ffm'
testFile = '../data/test_m_p_u_amc_only11twoComb' + '_' + str(featNum) + '.ffm'
t = str(250)
resultName = '../submit/ffmresult_ori_and_11twoComb' + '_' + str(
    featNum) + '_t' + t
assignCMDPara = './ffm-train -l 0.00002 -k 4 -t ' + t + ' -r 0.02 -s 8'
ffm.runffm(trainFile, testFile, resultName, assignCMDPara)
#--------------------------