def mytrain():
    global bb0
    global ww1
    print "Training model:"
    min_err = 0
    min_err_epoch = 0
    times_reduce = 0
    for i in range(epoch):
        start_time = time.time()
        index = 1
        for j in range(n_batch):
            fi,x,y = get_fi_h1_y(train_file,index,batch_size)
            index += batch_size
            gx,ww1,ww2, ww3,bb1,bb2,bb3 = train(x,y)
            b_size = len(fi)
        
            for t in range(b_size):
                ft = fi[t]
                gxt = gx[t]
                xt=x[t]
                bb0=bb0-lr*gxt*xt*(1-xt)
                for feat in ft:
                    ww0[feat]=ww0[feat]-lr *gxt*xt*(1-xt)
                

        train_time = time.time() - start_time
        mins = int(train_time / 60)
        secs = int(train_time % 60)
        print 'training: ' + str(mins) + 'm ' + str(secs) + 's'

        start_time = time.time()
        print_err(train_file,'\t\tTraining Err: \t' + str(i))# train error
        train_time = time.time() - start_time
        mins = int(train_time / 60)
        secs = int(train_time % 60)
        print 'training error: ' + str(mins) + 'm ' + str(secs) + 's'

        start_time = time.time()
        auc, rmse = auc_rmse(test_file)
        test_time = time.time() - start_time
        mins = int(test_time / 60)
        secs = int(test_time % 60)
        ut.log_p( 'Test Err:' + str(i) + '\t' + str(auc) + '\t' + str(rmse))
        print 'test error: ' + str(mins) + 'm ' + str(secs) + 's'

        #stop training when no improvement for a while 
        if auc>min_err:
            min_err=auc
            min_err_epoch=i
            if times_reduce<3:
                times_reduce+=1
        else:
            times_reduce-=1
        if times_reduce<-2:
            break
    ut.log_p( 'Minimal test error is '+ str( min_err)+' , at EPOCH ' + str(min_err_epoch))
def print_err(file,msg=''):
    auc,rmse=auc_rmse(file)
    ut.log_p( msg + '\t' + str(auc) + '\t' + str(rmse))  
示例#3
0
if advertiser == '2997':
    lr=0.05
if advertiser== '3386':                                       #number of batches
    x_dim=0
    
if sys.argv[2]=='mod' and advertiser=='2997':
    lr=0.1
    lambda1=0.00
    


    
    
ut.log_p('X:'+str(x_dim) + ' | Hidden 1:'+str(hidden1)+ ' | Hidden 2:'+str(hidden2)+
        ' | L rate:'+str(lr)+ ' | activation1:'+ str(acti_type)+
        ' | lambda:'+str(lambda1)
        )
        

ww3=numpy.zeros(hidden2)
# ww3=rng.uniform(-0.05,0.05,hidden2)
bb3=0.


arr=[]
arr.append(x_dim)
arr.append(hidden1)
arr.append(hidden2)

# ww1,bb1,ww2,bb2=da.get_da_weights(train_file,arr,ncases=train_size,batch_size=100000)
# pickle.dump( (ww1,bb1,ww2,bb2), open( "2997_da_10.p", "wb" ))
    lambda1=0.0000001
    dropout=0.98
    lambda1=0
elif advertiser=='3386':
    train_size=ut.file_len(train_file)                    #training size
    test_size=ut.file_len(test_file)
    x_dim=556897
elif advertiser=='all':
    x_dim=937670
    dropout=0.95
    lambda1=0
    lr=0.001
    hidden0=500
    hidden1=400    
ut.log_p('drop_mlp4da.py|ad:'+advertiser+'|drop:'+str(dropout)+'|b_size:'+str(batch_size)+' | X:'+str(x_dim) + ' | Hidden 0:'+str(hidden0)+ ' | Hidden 1:'+str(hidden1)+ ' | Hidden 2:'+str(hidden2)+
        ' | L_r:'+str(lr)+ ' | activation1:'+ str(acti_type)+
        ' | lambda:'+str(lambda1)
        )
        
# initialise parameters
arr=[]
arr.append(x_dim)
arr.append(hidden0)
arr.append(hidden1)
arr.append(hidden2)

ww0,bb0=ut.init_weight(x_dim,hidden0,'sigmoid')
ww1,bb1=ut.init_weight(hidden0,hidden1,'sigmoid')
ww2,bb2=ut.init_weight(hidden1,hidden2,'sigmoid')

# ww0,bb0,ww1,bb1,ww2,bb2=da.get_da_weights(train_file,arr,ncases=train_size,batch_size=100000)
# pickle.dump( (ww0,bb0,ww1,bb1,ww2,bb2), open( "2997_da_4l_10.p", "wb" ))
示例#5
0
        name = s[1 + k][0:s[1 + k].index(':')]
        field = name_field[name]
        feat_field[feat] = field

def feat_layer_one_index(feat, l):
    return 1 + feat_field[feat] * k + l

def feats_to_layer_one_array(feats):
    x = numpy.zeros(xdim)
    x[0] = w_0
    for feat in feats:
        x[feat_layer_one_index(feat, 0):feat_layer_one_index(feat, k)] = feat_weights[feat]
    return x

ut.log_p('drop_mlp3fm.py|ad:'+advertiser+'|drop:'+str(dropout)+'|b_size:'+str(batch_size)+' | X:'+str(xdim) + ' | Hidden 1:'+str(hidden1)+ ' | Hidden 2:'+str(hidden2)+
        ' | L_r:'+str(lr)+ ' | activation1:'+ str(acti_type)+
        ' | lambda:'+str(lambda1)
        )
        
# initialise parameters
w=rng.uniform(  low=-numpy.sqrt(6. / (xdim + hidden1)),
                high=numpy.sqrt(6. / (xdim + hidden1)),
                size=(xdim,hidden1))
if acti_type=='sigmoid':
    ww1=numpy.asarray((w))
elif acti_type=='tanh':
    ww1=numpy.asarray((w*4))
else:
    ww1=numpy.asarray(rng.uniform(-1,1,size=(xdim,hidden1)))

bb1=numpy.zeros(hidden1)
示例#6
0
def print_err(file,msg=''):
    auc,rmse=get_err_bat(file)
    ut.log_p( msg + '\t' + str(auc) + '\t' + str(rmse))    
示例#7
0
if len(sys.argv) > 1:
    advertiser = sys.argv[1]
train_file = "../../make-ipinyou-data/" + advertiser + "/train.dl.txt"  # training file
test_file = "../../make-ipinyou-data/" + advertiser + "/test.dl.txt"  # test file
feats = ut.feats_len(train_file)  # feature size
train_size = 312437  # ut.file_len(train_file)					#training size
test_size = 156063  # ut.file_len(test_file)		  			#test size
n_batch = train_size / batch_size  # number of batches


ut.log_p(
    "Hidden one:"
    + str(hidden1)
    + "|L rate:"
    + str(lr)
    + "|activation1:"
    + str(acti_type)
    + "|feats:"
    + str(feats)
    + "|lambda1:"
    + str(lambda1)
)

# initialise parameters
w = rng.uniform(
    low=-numpy.sqrt(6.0 / (feats + hidden1)), high=numpy.sqrt(6.0 / (feats + hidden1)), size=(feats, hidden1)
)
if acti_type == "sigmoid":
    ww1 = numpy.asarray((w))
elif acti_type == "tanh":
    ww1 = numpy.asarray((w * 4))
else:
示例#8
0
def print_err(file, msg=""):
    auc, rmse = get_err_bat(file)
    ut.log_p(msg + "\t" + str(auc) + "\t" + str(rmse))
示例#9
0
def log_p(msg, m=""):
    ut.log_p(msg, "drop_mlp4da" + str(advertiser))
# feats = ut.feats_len(train_file)                                           #feature size
train_size = 312437  # ut.file_len(train_file)                    #training size
test_size = 156063  # ut.file_len(test_file)                      #test size
n_batch = train_size / batch_size  # number of batches


o_fm = fm.DataFM(fm_model_file)


ut.log_p(
    "X:"
    + str(o_fm.xdim)
    + " | Hidden 1:"
    + str(hidden1)
    + " | Hidden 2:"
    + str(hidden2)
    + " | L rate:"
    + str(lr)
    + " | activation1:"
    + str(acti_type)
    + " | lambda:"
    + str(lambda1)
)

# initialise parameters
w = rng.uniform(
    low=-numpy.sqrt(6.0 / (o_fm.xdim + hidden1)),
    high=numpy.sqrt(6.0 / (o_fm.xdim + hidden1)),
    size=(o_fm.xdim, hidden1),
)
if acti_type == "sigmoid":
    ww1 = numpy.asarray((w))
示例#11
0
def log_p(msg,m=""):
    ut.log_p(msg,"drop_mlp4da"+str(advertiser)) 
示例#12
0
    lambda1=0.0000001
elif advertiser=='3386':
    x_dim=556897
elif advertiser=='2261':
    lambda1=0 
    x_dim=333143
if sys.argv[2]=='mod' and advertiser=='2997':
    train_file=train_file+'.mod4.txt'
    train_size=ut.file_len(train_file)
    print train_file
    n_batch=train_size/batch_size
    lr=0.002
    lambda1=0.00

ut.log_p('ucl_mlp4da.py|'+'bat size:'+str(batch_size)+' | X:'+str(x_dim) + ' | Hidden 0:'+str(hidden0)+ ' | Hidden 1:'+str(hidden1)+ ' | Hidden 2:'+str(hidden2)+
        ' | L rate:'+str(lr)+ ' | activation1:'+ str(acti_type)+
        ' | lambda:'+str(lambda1)
        )
        
# initialise parameters
arr=[]
arr.append(x_dim)
arr.append(hidden0)
arr.append(hidden1)
arr.append(hidden2)

ww0,bb0=ut.init_weight(x_dim,hidden0,'sigmoid')
ww1,bb1=ut.init_weight(hidden0,hidden1,'sigmoid')
ww2,bb2=ut.init_weight(hidden1,hidden2,'sigmoid')

# ww0,bb0,ww1,bb1,ww2,bb2=da.get_da_weights(train_file,arr,ncases=train_size,batch_size=100000)
# pickle.dump( (ww0,bb0,ww1,bb1,ww2,bb2), open( "2997_da_4l_10.p", "wb" ))
train_file='../../make-ipinyou-data/' + advertiser + '/train.fm.txt'             #training file
test_file='../../make-ipinyou-data/' + advertiser + '/test.fm.txt'                   #test file
fm_model_file='../../make-ipinyou-data/' + advertiser + '/fm.model.txt'                   #fm model file
#feats = ut.feats_len(train_file)                                           #feature size
train_size=312437        #ut.file_len(train_file)                    #training size
test_size=156063         #ut.file_len(test_file)                      #test size
n_batch=train_size/batch_size                                        #number of batches



o_fm=fm.DataFM(fm_model_file)



ut.log_p('F:' + str(len(o_fm.feat_weights)) + ' | X:'+str(o_fm.xdim) + ' | Hidden1:'+str(hidden1)+ ' | Hidden2:'+str(hidden2)+
        ' | L rate:'+str(lr)+ ' | activation:'+ str(acti_type)+
        ' | lambda:'+str(lambda1)
        )
        
# initialise parameters
w=rng.uniform(  low=-numpy.sqrt(6. / (o_fm.xdim + hidden1)),
                high=numpy.sqrt(6. / (o_fm.xdim + hidden1)),
                size=(o_fm.xdim,hidden1))
if acti_type=='sigmoid':
    ww1=numpy.asarray((w*4))
elif acti_type=='tanh':
    ww1=numpy.asarray((w*4))
else:
    ww1=numpy.asarray(rng.uniform(-1,1,size=(o_fm.xdim,hidden1)))

bb1=numpy.zeros(hidden1)