示例#1
0
    print 'Usage: <train.csv> <model.dat>'
    exit(-1)

dpath_train = sys.argv[1]
dpath_model = sys.argv[2]

eta = 0.01
nround = 3000
lc = 0.5
test_size = 550000

label, dtrain, weight, punit, pset = phy.load_train(dpath_train)
# list of features that we want
features = set(['E_inv', 'E_tri', 'm_tri', 'm_inv', 'pts', 'p_x', 'p_y', 'p_z'])
# use all features without met for now
dextra = phy.mkf_pset([p for p in pset], features)

# concatenate all features together
dtrain = np.concatenate([dtrain, dextra], axis=-1)

print 'finish making features, shape=%s' % str(dtrain.shape)

# rescale weight to make it same as test set
weight = weight * float(test_size) / len(label)

sum_wpos = sum(weight[i] for i in range(len(label)) if label[i] == 1.0)
sum_wneg = sum(weight[i] for i in range(len(label)) if label[i] == 0.0)
# print weight statistics 
print ('weight statistics: wpos=%g, wneg=%g, ratio=%g' % ( sum_wpos, sum_wneg, sum_wneg/sum_wpos ))

# construct xgboost.DMatrix from numpy array, treat -999.0 as missing value
示例#2
0
dpath_result = sys.argv[3]

lc = 0.5
test_size = 550000
threshold_ratio = 0.15
outfile = sys.argv[1].rsplit('.', 1)[0] + ".csv"
print outfile
# path to where the data lies

idx, dtest, punit, pset = phy.load_test(dpath_test)
# list of features that we want
features = set(
    ['E_inv', 'E_tri', 'm_tri', 'm_inv', 'pts', 'p_x', 'p_y', 'p_z'])

# use all features without met for now
dpset = phy.mkf_pset([p for p in pset], features)

# concatenate all features together
dtest = np.concatenate([dtest, dpset], axis=-1)
print 'finish making features, shape=%s' % str(dtest.shape)

# print weight statistics
xgmat = xgb.DMatrix(dtest, missing=-999.0)
bst = xgb.Booster()
bst.load_model(dpath_model)
ypred = bst.predict(xgmat)

res = [(int(idx[i]), ypred[i]) for i in range(len(ypred))]
rorder = {}
for k, v in sorted(res, key=lambda x: -x[1]):
    rorder[k] = len(rorder) + 1
示例#3
0
    exit(-1)

dpath_train = sys.argv[1]
dpath_model = sys.argv[2]

eta = 0.01
nround = 3000
lc = 0.5
test_size = 550000

label, dtrain, weight, punit, pset = phy.load_train(dpath_train)
# list of features that we want
features = set(
    ['E_inv', 'E_tri', 'm_tri', 'm_inv', 'pts', 'p_x', 'p_y', 'p_z'])
# use all features without met for now
dextra = phy.mkf_pset([p for p in pset], features)

# concatenate all features together
dtrain = np.concatenate([dtrain, dextra], axis=-1)

print 'finish making features, shape=%s' % str(dtrain.shape)

# rescale weight to make it same as test set
weight = weight * float(test_size) / len(label)

sum_wpos = sum(weight[i] for i in range(len(label)) if label[i] == 1.0)
sum_wneg = sum(weight[i] for i in range(len(label)) if label[i] == 0.0)
# print weight statistics
print('weight statistics: wpos=%g, wneg=%g, ratio=%g' %
      (sum_wpos, sum_wneg, sum_wneg / sum_wpos))
示例#4
0
dpath_model = sys.argv[2]
dpath_result = sys.argv[3]

lc = 0.5
test_size = 550000
threshold_ratio = 0.15
outfile = sys.argv[1].rsplit('.',1)[0]+".csv"
print outfile
# path to where the data lies

idx, dtest, punit, pset = phy.load_test(dpath_test)
# list of features that we want
features = set(['E_inv', 'E_tri', 'm_tri', 'm_inv', 'pts', 'p_x', 'p_y', 'p_z'])

# use all features without met for now
dpset = phy.mkf_pset([p for p in pset], features)

# concatenate all features together
dtest = np.concatenate([dtest, dpset], axis=-1)
print 'finish making features, shape=%s' % str(dtest.shape)

# print weight statistics 
xgmat = xgb.DMatrix(dtest, missing = -999.0)
bst = xgb.Booster()
bst.load_model(dpath_model)
ypred = bst.predict( xgmat )

res  = [ ( int(idx[i]), ypred[i] ) for i in range(len(ypred)) ] 
rorder = {}
for k, v in sorted( res, key = lambda x:-x[1] ):
    rorder[ k ] = len(rorder) + 1