print 'Usage: <train.csv> <model.dat>' exit(-1) dpath_train = sys.argv[1] dpath_model = sys.argv[2] eta = 0.01 nround = 3000 lc = 0.5 test_size = 550000 label, dtrain, weight, punit, pset = phy.load_train(dpath_train) # list of features that we want features = set(['E_inv', 'E_tri', 'm_tri', 'm_inv', 'pts', 'p_x', 'p_y', 'p_z']) # use all features without met for now dextra = phy.mkf_pset([p for p in pset], features) # concatenate all features together dtrain = np.concatenate([dtrain, dextra], axis=-1) print 'finish making features, shape=%s' % str(dtrain.shape) # rescale weight to make it same as test set weight = weight * float(test_size) / len(label) sum_wpos = sum(weight[i] for i in range(len(label)) if label[i] == 1.0) sum_wneg = sum(weight[i] for i in range(len(label)) if label[i] == 0.0) # print weight statistics print ('weight statistics: wpos=%g, wneg=%g, ratio=%g' % ( sum_wpos, sum_wneg, sum_wneg/sum_wpos )) # construct xgboost.DMatrix from numpy array, treat -999.0 as missing value
dpath_result = sys.argv[3] lc = 0.5 test_size = 550000 threshold_ratio = 0.15 outfile = sys.argv[1].rsplit('.', 1)[0] + ".csv" print outfile # path to where the data lies idx, dtest, punit, pset = phy.load_test(dpath_test) # list of features that we want features = set( ['E_inv', 'E_tri', 'm_tri', 'm_inv', 'pts', 'p_x', 'p_y', 'p_z']) # use all features without met for now dpset = phy.mkf_pset([p for p in pset], features) # concatenate all features together dtest = np.concatenate([dtest, dpset], axis=-1) print 'finish making features, shape=%s' % str(dtest.shape) # print weight statistics xgmat = xgb.DMatrix(dtest, missing=-999.0) bst = xgb.Booster() bst.load_model(dpath_model) ypred = bst.predict(xgmat) res = [(int(idx[i]), ypred[i]) for i in range(len(ypred))] rorder = {} for k, v in sorted(res, key=lambda x: -x[1]): rorder[k] = len(rorder) + 1
exit(-1) dpath_train = sys.argv[1] dpath_model = sys.argv[2] eta = 0.01 nround = 3000 lc = 0.5 test_size = 550000 label, dtrain, weight, punit, pset = phy.load_train(dpath_train) # list of features that we want features = set( ['E_inv', 'E_tri', 'm_tri', 'm_inv', 'pts', 'p_x', 'p_y', 'p_z']) # use all features without met for now dextra = phy.mkf_pset([p for p in pset], features) # concatenate all features together dtrain = np.concatenate([dtrain, dextra], axis=-1) print 'finish making features, shape=%s' % str(dtrain.shape) # rescale weight to make it same as test set weight = weight * float(test_size) / len(label) sum_wpos = sum(weight[i] for i in range(len(label)) if label[i] == 1.0) sum_wneg = sum(weight[i] for i in range(len(label)) if label[i] == 0.0) # print weight statistics print('weight statistics: wpos=%g, wneg=%g, ratio=%g' % (sum_wpos, sum_wneg, sum_wneg / sum_wpos))
dpath_model = sys.argv[2] dpath_result = sys.argv[3] lc = 0.5 test_size = 550000 threshold_ratio = 0.15 outfile = sys.argv[1].rsplit('.',1)[0]+".csv" print outfile # path to where the data lies idx, dtest, punit, pset = phy.load_test(dpath_test) # list of features that we want features = set(['E_inv', 'E_tri', 'm_tri', 'm_inv', 'pts', 'p_x', 'p_y', 'p_z']) # use all features without met for now dpset = phy.mkf_pset([p for p in pset], features) # concatenate all features together dtest = np.concatenate([dtest, dpset], axis=-1) print 'finish making features, shape=%s' % str(dtest.shape) # print weight statistics xgmat = xgb.DMatrix(dtest, missing = -999.0) bst = xgb.Booster() bst.load_model(dpath_model) ypred = bst.predict( xgmat ) res = [ ( int(idx[i]), ypred[i] ) for i in range(len(ypred)) ] rorder = {} for k, v in sorted( res, key = lambda x:-x[1] ): rorder[ k ] = len(rorder) + 1