def test1(learning_method, exploration): print print '# testing learning_method=%d exploration=%d' % (learning_method, exploration) print n_types = 10 n_labels = 4 data = macarico.util.make_sequence_mod_data(100, 6, n_types, n_labels) data = [Example(x, y, n_labels) for x, y in data] tRNN = TransitionRNN([RNNFeatures(n_types)], [AttendAt()], n_labels) policy = LinearPolicy(tRNN, n_labels) optimizer = torch.optim.Adam(policy.parameters(), lr=0.001) p_rollin_ref = stochastic(ExponentialAnnealing(0.9)) p_rollout_ref = stochastic(ExponentialAnnealing(0.99999)) macarico.util.trainloop( training_data=data[:len(data) // 2], dev_data=data[len(data) // 2:], policy=policy, Learner=lambda: BanditLOLS( HammingLossReference(), policy, p_rollin_ref, p_rollout_ref, learning_method, # LEARN_IPS, LEARN_DR, LEARN_BIASED exploration, ), losses=HammingLoss(), optimizer=optimizer, run_per_epoch=[p_rollin_ref.step, p_rollout_ref.step], train_eval_skip=10, )
def test1(): n_types = 10 n_labels = 4 print print '# test sequence labeler on mod data with LOLS' data = macarico.util.make_sequence_mod_data(20, 6, n_types, n_labels) data = [Example(x, y, n_labels) for x, y in data] tRNN = TransitionRNN([RNNFeatures(n_types)], [AttendAt()], n_labels) policy = LinearPolicy(tRNN, n_labels) optimizer = torch.optim.Adam(policy.parameters(), lr=0.01) p_rollin_ref = stochastic(ExponentialAnnealing(0.9)) p_rollout_ref = stochastic(ExponentialAnnealing(0.9)) macarico.util.trainloop( training_data=data[:len(data) // 2], dev_data=data[len(data) // 2:], policy=policy, learning_alg=lambda ex: LOLS.lols(ex, HammingLoss, HammingLossReference(), policy, p_rollin_ref, p_rollout_ref), losses=HammingLoss(), optimizer=optimizer, run_per_epoch=[p_rollin_ref.step, p_rollout_ref.step], train_eval_skip=1, )
def test1(LEARNER=LearnerOpts.DAGGER): print print 'Running test 1 with learner=%s' % LEARNER print '=======================================================' n_states = 3 n_actions = 2 tRNN = TransitionRNN([mdp.MDPFeatures(n_states, noise_rate=0.5)], [AttendAt(lambda _: 0, 's')], n_actions) policy = LinearPolicy(tRNN, n_actions) p_rollin_ref = stochastic(ExponentialAnnealing(0.99)) p_rollout_ref = stochastic(ExponentialAnnealing(1)) optimizer = torch.optim.Adam(policy.parameters(), lr=0.01) test_mdp, pi_ref = make_ross_mdp() if LEARNER == LearnerOpts.DAGGER: learner = lambda: DAgger(pi_ref, policy, p_rollin_ref) elif LEARNER == LearnerOpts.TWISTED: learner = lambda: TwistedDAgger(pi_ref, policy, p_rollin_ref) elif LEARNER == LearnerOpts.MAXLIK: learner = lambda: MaximumLikelihood(pi_ref, policy) elif LEARNER == LearnerOpts.AGGREVATE: learner = lambda: AggreVaTe(pi_ref, policy, p_rollin_ref) elif LEARNER == LearnerOpts.LOLS: learner = None losses = [] for epoch in xrange(101): optimizer.zero_grad() if learner is not None: l = learner() env = test_mdp.mk_env() res = env.run_episode(l) loss = mdp.MDPLoss()(test_mdp, env) l.update(loss) elif LEARNER == LearnerOpts.LOLS: lols(test_mdp, mdp.MDPLoss, pi_ref, policy, p_rollin_ref, p_rollout_ref) optimizer.step() p_rollin_ref.step() p_rollout_ref.step() env = test_mdp.mk_env() res = env.run_episode(policy) loss = mdp.MDPLoss()(test_mdp, env) losses.append(loss) if epoch % 20 == 0: print epoch, sum(losses[-100:]) / len(losses[-100:]), '\t', res
def test2(): # aggrevate print print '# test sequence labeler on mod data with AggreVaTe' n_types = 10 n_labels = 4 data = macarico.util.make_sequence_mod_data(100, 5, n_types, n_labels) data = [Example(x, y, n_labels) for x, y in data] tRNN = TransitionRNN( [RNNFeatures(n_types)], [AttendAt()], n_labels, ) policy = LinearPolicy(tRNN, n_labels) p_rollin_ref = stochastic(ExponentialAnnealing(0.99)) optimizer = torch.optim.Adam(policy.parameters(), lr=0.01) macarico.util.trainloop( training_data=data[:len(data) // 2], dev_data=data[len(data) // 2:], policy=policy, Learner=lambda: AggreVaTe(HammingLossReference(), policy, p_rollin_ref ), losses=HammingLoss(), optimizer=optimizer, run_per_epoch=[p_rollin_ref.step], n_epochs=4, train_eval_skip=1, )
def test0(): print print '# test sequence labeler on mod data with DAgger' n_types = 10 n_labels = 4 data = [ Example(x, y, n_labels) for x, y in macarico.util.make_sequence_mod_data( 100, 5, n_types, n_labels) ] tRNN = Actor([RNNFeatures(n_types, output_field='mytok_rnn')], [AttendAt(field='mytok_rnn')], n_labels) policy = LinearPolicy(tRNN, n_labels) p_rollin_ref = stochastic(ExponentialAnnealing(0.99)) optimizer = torch.optim.Adam(policy.parameters(), lr=0.01) macarico.util.trainloop( training_data=data[:len(data) // 2], dev_data=data[len(data) // 2:], policy=policy, Learner=lambda: DAgger(HammingLossReference(), policy, p_rollin_ref), losses=HammingLoss(), optimizer=optimizer, run_per_epoch=[p_rollin_ref.step], n_epochs=4, train_eval_skip=1, )
def test_wsj(): print print '# test on wsj subset' from macarico.data import nlp_data tr,de,te,vocab,label_id = \ nlp_data.read_wsj_pos('data/wsj.pos', n_tr=50, n_de=50, n_te=0) n_types = len(vocab) n_labels = len(label_id) print 'n_train: %s, n_dev: %s, n_test: %s' % (len(tr), len(de), len(te)) print 'n_types: %s, n_labels: %s' % (n_types, n_labels) tRNN = TransitionRNN([RNNFeatures(n_types, rnn_type='RNN')], [AttendAt()], n_labels) policy = LinearPolicy(tRNN, n_labels) p_rollin_ref = stochastic(ExponentialAnnealing(0.9)) optimizer = torch.optim.Adam(policy.parameters(), lr=0.01) macarico.util.trainloop( training_data=tr, dev_data=de, policy=policy, Learner=lambda: DAgger(HammingLossReference(), policy, p_rollin_ref), # Learner = lambda: MaximumLikelihood(HammingLossReference(), policy), losses=HammingLoss(), optimizer=optimizer, run_per_epoch=[p_rollin_ref.step], n_epochs=10, # train_eval_skip = None, )
def __init__(self, policy, reference, p_rollin_ref=NoAnnealing(0)): macarico.Learner.__init__(self) assert isinstance(policy, CostSensitivePolicy) self.rollin_ref = stochastic(p_rollin_ref) self.policy = policy self.reference = reference self.objective = 0.0
def __init__( self, policy, reference, loss_fn, p_rollin_ref=NoAnnealing(0), p_rollout_ref=NoAnnealing(0.5), mixture=MIX_PER_ROLL, ): macarico.LearningAlg.__init__(self) self.policy = policy self.reference = reference self.loss_fn = loss_fn() self.rollin_ref = stochastic(p_rollin_ref) self.rollout_ref = stochastic(p_rollout_ref) self.mixture = mixture self.rollout = None self.true_costs = torch.zeros(self.policy.n_actions) self.warned_rollout_ref = False
def test1(learning_method, exploration): print print '# testing learning_method=%d exploration=%d' % (learning_method, exploration) print n_types = 10 n_labels = 2 data = macarico.util.make_sequence_mod_data(100, 1, n_types, n_labels) data = [Example(x, y, n_labels) for x, y in data] bag_size = 5 tRNN = [ TransitionRNN([RNNFeatures(n_types)], [AttendAt()], n_labels) for i in range(bag_size) ] policy = BootstrapPolicy(tRNN, n_labels) #policy = LinearPolicy(tRNN[0], n_labels) #print 'policy=', policy #print 'parameters=', list(policy.parameters()) optimizer = torch.optim.Adam(policy.parameters(), lr=0.01) p_rollin_ref = stochastic(ExponentialAnnealing(0.9)) p_rollout_ref = stochastic(ExponentialAnnealing(0.99999)) macarico.util.trainloop( training_data=data[:len(data) // 2], dev_data=data[len(data) // 2:], policy=policy, Learner=lambda: BanditLOLS( HammingLossReference(), policy, p_rollin_ref, p_rollout_ref, learning_method, exploration, ), losses=HammingLoss(), optimizer=optimizer, run_per_batch=[p_rollin_ref.step, p_rollout_ref.step], train_eval_skip=1, n_epochs=2, )
def __init__( self, policy, reference=None, p_rollin_ref=NoAnnealing(0), p_rollout_ref=NoAnnealing(0.5), update_method=LEARN_MTR, exploration=EXPLORE_BOLTZMANN, p_explore=NoAnnealing(1.0), mixture=LOLS.MIX_PER_ROLL, ): macarico.Learner.__init__(self) if reference is None: reference = lambda s: np.random.choice(list(s.actions)) self.policy = policy self.reference = reference self.rollin_ref = stochastic(p_rollin_ref) self.rollout_ref = stochastic(p_rollout_ref) self.update_method = update_method self.exploration = exploration self.explore = stochastic(p_explore) self.mixture = mixture assert self.update_method in range(BanditLOLS._LEARN_MAX), \ 'unknown update_method, must be one of BanditLOLS.LEARN_*' assert self.exploration in range(BanditLOLS._EXPLORE_MAX), \ 'unknown exploration, must be one of BanditLOLS.EXPLORE_*' self.dev_t = None self.dev_a = None self.dev_actions = None self.dev_imp_weight = None self.dev_costs = None self.rollout = None self.t = None self.disallow = torch.zeros(self.policy.n_actions) self.truth = torch.zeros(self.policy.n_actions)
def test1(use_bootstrap): n_types = 10 n_labels = 4 print print '# test sequence labeler on mod data with Reslope and', ( 'bootstrap' if use_bootstrap else 'boltzmann'), 'exploration' data = macarico.util.make_sequence_mod_data(3000, 6, n_types, n_labels) data = [Example(x, y, n_labels) for x, y in data] if not use_bootstrap: tRNN = TransitionRNN([RNNFeatures(n_types)], [AttendAt()], n_labels) policy = LinearPolicy(tRNN, n_labels) else: rnns = [ TransitionRNN([RNNFeatures(n_types)], [AttendAt()], n_labels, h_name='h%d' % i) for i in xrange(5) ] policy = BootstrapPolicy(rnns, n_labels) optimizer = torch.optim.Adam(policy.parameters(), lr=0.01) p_ref = stochastic(ExponentialAnnealing(0.9)) macarico.util.trainloop( training_data = data[:2048], dev_data = data[2048:], policy = policy, Learner = lambda: Reslope(HammingLossReference(), policy, p_ref, exploration=BanditLOLS.EXPLORE_BOOTSTRAP if use_bootstrap else \ BanditLOLS.EXPLORE_BOLTZMANN ), losses = HammingLoss(), optimizer = optimizer, run_per_epoch = [p_ref.step], train_eval_skip = 1, bandit_evaluation = True, n_epochs = 1, )
def __init__(self, reference, policy, p_ref, learning_method=BanditLOLS.LEARN_DR, exploration=BanditLOLS.EXPLORE_BOLTZMANN, explore=1.0, mixture=BanditLOLS.MIX_PER_ROLL, temperature=1.): self.reference = reference self.policy = policy self.learning_method = learning_method self.exploration = exploration self.temperature = temperature assert self.learning_method in range(BanditLOLS._LEARN_MAX), \ 'unknown learning_method, must be one of BanditLOLS.LEARN_*' assert self.exploration in range(BanditLOLS._EXPLORE_MAX), \ 'unknown exploration, must be one of BanditLOLS.EXPLORE_*' if mixture == BanditLOLS.MIX_PER_ROLL: use_ref = p_ref() self.use_ref = lambda: use_ref else: self.use_ref = p_ref if isinstance(explore, float): explore = stochastic(NoAnnealing(explore)) self.explore = explore self.t = None self.dev_t = [] self.dev_a = [] self.dev_actions = [] self.dev_imp_weight = [] self.dev_costs = [] self.squared_loss = 0. self.pred_act_cost = [] macarico.Learner.__init__(self)
def __init__(self, policy, reference, p_rollin_ref=NoAnnealing(0)): macarico.Learner.__init__(self) self.rollin_ref = stochastic(p_rollin_ref) self.policy = policy self.reference = reference self.objective = 0.0
def test1(task=0, LEARNER=LearnerOpts.DAGGER): print print 'Running test 1 (v%d) with learner=%s' % (task, LEARNER) print '=======================================================' if task == 0: print 'Sequence reversal task, easy version' data = macarico.util.make_sequence_reversal_data(100, 5, 5) foci = [AttendAt(lambda s: s.N - s.n - 1)] elif task == 1: print 'Sequence reversal task, hard version' data = macarico.util.make_sequence_reversal_data(1000, 5, 5) foci = [AttendAt()] elif task == 2: print 'Sequence reversal task, multi-focus version' data = macarico.util.make_sequence_reversal_data(100, 5, 5) foci = [AttendAt(), AttendAt(lambda s: s.N - s.n - 1)] elif task == 3: print 'Memoryless task, add-one mod K' data = macarico.util.make_sequence_mod_data(50, 5, 10, 3) foci = [AttendAt()] elif task == 4: print 'Matti-style data' data = make_matti_data(1000, 20, 2, 0.05) foci = [AttendAt()] n_types = 1 + max({x for X, _ in data for x in X}) n_labels = 1 + max({y for _, Y in data for y in Y}) data = [Example(x, y, n_labels) for x, y in data] random.shuffle(data) m = len(data) // 2 train = data[:m] dev = data[m:] print 'n_train: %s, n_dev: %s' % (len(train), len(dev)) print 'n_types: %s, n_labels: %s' % (n_types, n_labels) print 'learner:', LEARNER print tRNN = Actor([RNNFeatures(n_types)], foci, n_labels) policy = LinearPolicy(tRNN, n_labels) baseline = EWMA(0.8) p_rollin_ref = stochastic(ExponentialAnnealing(0.5)) p_rollout_ref = stochastic(ExponentialAnnealing(0.5)) if LEARNER == LearnerOpts.AC: from macarico.lts.reinforce import AdvantageActorCritic, LinearValueFn baseline = LinearValueFn(policy.features) policy.vfa = baseline # adds params to policy via nn.module optimizer = torch.optim.Adam(policy.parameters(), lr=0.01) if LEARNER == LearnerOpts.DAGGER: learner = lambda: DAgger(HammingLossReference(), policy, p_rollin_ref) elif LEARNER == LearnerOpts.TWISTED: learner = lambda: TwistedDAgger(HammingLossReference(), policy, p_rollin_ref) elif LEARNER == LearnerOpts.MAXLIK: learner = lambda: MaximumLikelihood(HammingLossReference(), policy) elif LEARNER == LearnerOpts.AC: learner = lambda: AdvantageActorCritic(policy, baseline) elif LEARNER == LearnerOpts.REINFORCE: learner = lambda: Reinforce(policy, baseline) elif LEARNER == LearnerOpts.BANDITLOLS: learner = lambda: BanditLOLS(HammingLossReference( ), policy, p_rollin_ref, p_rollout_ref, BanditLOLS.LEARN_DR, BanditLOLS .EXPLORE_UNIFORM, baseline) macarico.util.trainloop( training_data=train, dev_data=dev, policy=policy, Learner=learner, losses=HammingLoss(), optimizer=optimizer, run_per_epoch=[p_rollin_ref.step, p_rollout_ref.step], n_epochs=10, train_eval_skip=1, )