def train_run(n_examples, n_features, decisions_in, rseed=1): # -- need y for margin computation X, y = toyproblem.digits_xy(0, n_examples) assert len(y) == n_examples config = dict( n_examples_train=n_examples, n_examples_test=0, n_folds=5, feat_spec=dict(seed=rseed, n_features=n_features, scale=2.2), decisions=decisions_in, # gets clobbered by ctrl attachment save_svms=False, # good to test False sometimes svm_l2_regularization=1e-3, svm_max_observations=1e5, ) ctrl = Ctrl(None) BI = toyproblem.BoostableDigits() result = BI.evaluate(config, ctrl) decisions = np.asarray(result['decisions']) assert decisions.shape == (5, n_examples) print decisions.shape print 'mean abs decisions', abs(decisions).mean(), print 'mean margins', 1 - np.minimum(decisions * y, 1).mean(), tr_acc, te_acc = mean_acc(result) print 'train_accuracy', tr_acc, print 'test_accuracy', te_acc, print '' return decisions, tr_acc, te_acc
def test_boosting_margin_goes_down(): n_examples = 1750 X, y = toyproblem.digits_xy(0, n_examples) n_rounds = 8 margins = [] decisions = None for round_ii in range(n_rounds): config = dict( n_examples_train=n_examples, n_examples_test=0, n_folds=5, feat_spec=dict(seed=round_ii, n_features=16, scale=2.2), decisions=decisions, save_svms=False, # good to test False sometimes svm_l2_regularization=1e-3, svm_max_observations=1e3, ) ctrl = Ctrl(None) BI = toyproblem.BoostableDigits() result = BI.evaluate(config, ctrl) decisions = np.asarray(result['decisions']) assert decisions.shape == (5, 1750) print 'mean abs decisions', abs(decisions).mean(), margins.append(1 - np.minimum(decisions * y, 1).mean()) for key in 'train_accuracy', 'test_accuracy': print key, np.mean([rr[key] for rr in result['splits']]), print '' print margins print list(reversed(margins)) print list(sorted(margins)) assert list(reversed(margins)) == list(sorted(margins))
def test_boosting_for_smoke(): n_examples = 1790 X, y = toyproblem.digits_xy(0, n_examples) assert len(y) == n_examples n_rounds = 16 n_features_per_round = 16 print 'Training jointly' _, joint_tr_acc, joint_te_acc = train_run( n_examples, n_rounds * n_features_per_round, None) print 'Training one round' _, one_tr_acc, one_te_acc = train_run( n_examples, n_features_per_round, None) # -- train in rounds print 'Training in rounds' decisions = None for round_ii in range(n_rounds): decisions, tr_acc, te_acc = train_run( n_examples, n_features_per_round, decisions, rseed=round_ii) # assert that round-training and joint training are both way better than # training just one assert joint_tr_acc > 95 assert joint_te_acc > 88 assert one_tr_acc < 72 assert one_te_acc < 72 assert tr_acc > 90 assert te_acc > 88