Exemplo n.º 1
0
def test(theta):
    print '----------------------'
    print ur'正在测试'

    (raw_data,posiCount) = BinReader.readData(r'c:\data\homework\1218t20_3.bin')
    
    x,real_y,online,result = getXY(np.array(raw_data))

    x = mapFeature(x)
    values = sigmoid(np.dot(x ,theta))

    #第3行是真实的结果,第4行是预测的概率
    result[:,3] = values
    
    result = list(result)
    result.sort(key=lambda x:x[3],reverse=True)
    print ur'共获得结果%d' % sum(values > 0.5)
    
    result = result[:1300]
   
    right = sum([item[2] for item in result])
    
    precision,recall,f1 = print_analyse(right,1300,posiCount)
    
    print ur'测试完毕'
Exemplo n.º 2
0
def test(theta):
    print '----------------------'
    print ur'正在测试'

    (raw_data,
     posiCount) = BinReader.readData(r'c:\data\homework\1218t20_3.bin')

    x, real_y, online, result = getXY(np.array(raw_data))

    x = mapFeature(x)
    values = sigmoid(np.dot(x, theta))

    #第3行是真实的结果,第4行是预测的概率
    result[:, 3] = values

    result = list(result)
    result.sort(key=lambda x: x[3], reverse=True)
    print ur'共获得结果%d' % sum(values > 0.5)

    result = result[:1300]

    right = sum([item[2] for item in result])

    precision, recall, f1 = print_analyse(right, 1300, posiCount)

    print ur'测试完毕'
Exemplo n.º 3
0
from sklearn.ensemble import GradientBoostingClassifier
from BinReader import BinReader
import numpy as np
from sklearn.ensemble.gradient_boosting import GradientBoostingRegressor

(data,label,items) = BinReader.readData(ur'F:\AliRecommendHomeworkData\1212新版\train1217.expand.norm.bin') 

X_train = np.array(data)
label = [item[0] for item in label]
y_train = np.array(label)
est = GradientBoostingRegressor(n_estimators=150, learning_rate=0.1,max_depth=3, random_state=0, loss='ls',verbose=1).fit(X_train, y_train)
print 'testing...'

reader = BinReader(ur'F:\AliRecommendHomeworkData\1212新版\test18.expand.norm.bin')
reader.open()
result = [0] * reader.LineCount
for i in xrange(reader.LineCount):
    (x,userid,itemid,label) = reader.readline()
    x[0] = 1
    y = est.predict([x])[0]
    result[i] = (userid,itemid,y)
    if i % 10000 == 0:
        print '%d/%d' % (i,reader.LineCount)
    
result.sort(key=lambda x:x[2],reverse=True)
result = result[:7000]


print ur'正在输出...'
with open('result.csv','w') as f:
    for item in result:
# encoding = utf-8

from sklearn.ensemble import GradientBoostingClassifier
from BinReader import BinReader
import numpy as np
from sklearn.ensemble.gradient_boosting import GradientBoostingRegressor

path_train = ur'data\temp_train.bin'
path_test = ur'data\temp_test.bin'

(data, label, items) = BinReader.readData(path_train)

X_train = np.array(data)
label = [item[0] for item in label]
y_train = np.array(label)
est = GradientBoostingRegressor(n_estimators=150, learning_rate=0.1, max_depth=3, random_state=0, loss='ls',
                                verbose=1).fit(X_train, y_train)
print 'testing...'

reader = BinReader(path_test)
reader.open()
result = [0] * reader.LineCount
for i in xrange(reader.LineCount):
    (x, userid, itemid, label) = reader.readline()
    x[0] = 1
    y = est.predict([x])[0]
    result[i] = (userid, itemid, y)
    if i % 10000 == 0:
        print '%d/%d' % (i, reader.LineCount)

result.sort(key=lambda x: x[2], reverse=True)
Exemplo n.º 5
0
from sklearn.ensemble import GradientBoostingClassifier
from BinReader import BinReader
import numpy as np
from sklearn.ensemble.gradient_boosting import GradientBoostingRegressor


(data,label,items) = BinReader.readData(ur'C:\data\medium\norm\train1217.bin') 

X_train = np.array(data)
label = [item[0] for item in label]
y_train = np.array(label)
est = GradientBoostingRegressor(n_estimators=300, learning_rate=0.1,max_depth=5, random_state=0, loss='ls',verbose=1).fit(X_train, y_train)
print 'testing...'


pass

reader = BinReader(ur'C:\data\test1218.bin')
reader.open()
result = [0] * reader.LineCount
for i in xrange(reader.LineCount):
    (x,userid,itemid,label) = reader.readline()
    x[0] = 1
    y = est.predict([x])[0]
    result[i] = (userid,itemid,y)
    if i % 10000 == 0:
        print '%d/%d' % (i,reader.LineCount)
    
result.sort(key=lambda x:x[2],reverse=True)
result = result[:400]