示例#1
0
from parse_data import yield_data, train, test
import numpy as np
from sklearn import preprocessing

N = 145231

if __name__ == '__main__':
    X = np.empty((N, 2006))
    y = np.empty(N)
    for (ind, row) in enumerate(yield_data(train)):
        ID, target, features = row
        X[ind, :] = features
        y[ind] = target

    X_scaled = preprocessing.scale(X)
示例#2
0
        OUTPUT:
            logarithmic loss of p given y
    '''

    p = max(min(p, 1. - 10e-15), 10e-15)
    return -log(p) if y == 1. else -log(1. - p)


start = datetime.now()

learner = ftrl_proximal(alpha, beta, L1, L2, D, interaction)

for e in range(epoch):
    loss = 0.
    count = 0
    for t,  y, x in yield_data(train, hashing_trick=True, d=D):  # data is a generator

        p = learner.predict(x)
        loss += logloss(p, y)
        learner.update(x, p, y)
        count+=1
        if count%1000==0:
            #print count,loss/count
            print('%s\tencountered: %d\tcurrent logloss: %f' % (
                datetime.now(), count, loss/count))
        #if count>10000: # comment this out when you run it locally.
            #break

count=0
loss=0
#import pickle