Exemplo n.º 1
0
def test():
    theta = readTheta()

    count = 0
    reader = BinReader(FILENAME)
    reader.open()
    result = [0] * reader.LineCount
    for i in xrange(reader.LineCount):
        (x, userid, itemid, label) = reader.readline()
        x[0] = 1
        y = np.dot(x[:37], theta)
        result[i] = (userid, itemid, y)
        if i % 10000 == 0:
            print '%d/%d' % (i, reader.LineCount)

    result.sort(key=lambda x: x[2], reverse=True)
    result = result[:6500]

    print ur'样本总数:', count
    print ur'正在输出...'
    with open('result.csv', 'w') as f:
        for item in result:
            f.write('%d,%d\n' % (item[0], item[1]))

    print ur'测试结束,输出个数:', 6500
Exemplo n.º 2
0
 def __init__(self, filename):
     self.filename = filename
     self.reader = BinReader(filename)
Exemplo n.º 3
0
                   action='store_true',
                   help="Don't print file info")

args = argsp.parse_args()

if args.construct and args.destruct:
    print("--construct and --destruct are mutually exclusive!")
    argsp.print_help()
    sys.exit(1)

# When destructing, we need to make the directory if it doesn't exist
if args.destruct:
    if not os.path.exists(args.destruct):
        os.makedirs(args.destruct)
    fh = FCH_Root()
    with BinReader(args.path) as br:
        fh.fromBinary(br)
    if not args.quiet:
        fh.printInfo()
    fh.destruct(args.destruct, overwrite=args.overwrite)
elif args.construct:
    fh = FCH_Root()
    fh.construct(args.construct)
    with BinWriter(args.path, overwrite=args.overwrite) as wr:
        fh.toBinary(wr)
    # Sanity read it again!
    with BinReader(args.path) as br:
        fh.fromBinary(br)
    if not args.quiet:
        fh.printInfo()
else:
import numpy as np
from sklearn.ensemble.gradient_boosting import GradientBoostingRegressor

path_train = ur'data\temp_train.bin'
path_test = ur'data\temp_test.bin'

(data, label, items) = BinReader.readData(path_train)

X_train = np.array(data)
label = [item[0] for item in label]
y_train = np.array(label)
est = GradientBoostingRegressor(n_estimators=150, learning_rate=0.1, max_depth=3, random_state=0, loss='ls',
                                verbose=1).fit(X_train, y_train)
print 'testing...'

reader = BinReader(path_test)
reader.open()
result = [0] * reader.LineCount
for i in xrange(reader.LineCount):
    (x, userid, itemid, label) = reader.readline()
    x[0] = 1
    y = est.predict([x])[0]
    result[i] = (userid, itemid, y)
    if i % 10000 == 0:
        print '%d/%d' % (i, reader.LineCount)

result.sort(key=lambda x: x[2], reverse=True)
result = result[:7000]

print "input"
with open('result.csv', 'w') as f: