Exemplo n.º 1
0
def test1(learning_method, exploration):
    print
    print '# testing learning_method=%d exploration=%d' % (learning_method,
                                                           exploration)
    print
    n_types = 10
    n_labels = 4
    data = macarico.util.make_sequence_mod_data(100, 6, n_types, n_labels)
    data = [Example(x, y, n_labels) for x, y in data]

    tRNN = TransitionRNN([RNNFeatures(n_types)], [AttendAt()], n_labels)
    policy = LinearPolicy(tRNN, n_labels)
    optimizer = torch.optim.Adam(policy.parameters(), lr=0.001)

    p_rollin_ref = stochastic(ExponentialAnnealing(0.9))
    p_rollout_ref = stochastic(ExponentialAnnealing(0.99999))

    macarico.util.trainloop(
        training_data=data[:len(data) // 2],
        dev_data=data[len(data) // 2:],
        policy=policy,
        Learner=lambda: BanditLOLS(
            HammingLossReference(),
            policy,
            p_rollin_ref,
            p_rollout_ref,
            learning_method,  # LEARN_IPS, LEARN_DR, LEARN_BIASED
            exploration,
        ),
        losses=HammingLoss(),
        optimizer=optimizer,
        run_per_epoch=[p_rollin_ref.step, p_rollout_ref.step],
        train_eval_skip=10,
    )
Exemplo n.º 2
0
def test1():
    n_types = 10
    n_labels = 4
    print
    print '# test sequence labeler on mod data with LOLS'
    data = macarico.util.make_sequence_mod_data(20, 6, n_types, n_labels)
    data = [Example(x, y, n_labels) for x, y in data]

    tRNN = TransitionRNN([RNNFeatures(n_types)], [AttendAt()], n_labels)
    policy = LinearPolicy(tRNN, n_labels)
    optimizer = torch.optim.Adam(policy.parameters(), lr=0.01)

    p_rollin_ref = stochastic(ExponentialAnnealing(0.9))
    p_rollout_ref = stochastic(ExponentialAnnealing(0.9))

    macarico.util.trainloop(
        training_data=data[:len(data) // 2],
        dev_data=data[len(data) // 2:],
        policy=policy,
        learning_alg=lambda ex: LOLS.lols(ex, HammingLoss,
                                          HammingLossReference(), policy,
                                          p_rollin_ref, p_rollout_ref),
        losses=HammingLoss(),
        optimizer=optimizer,
        run_per_epoch=[p_rollin_ref.step, p_rollout_ref.step],
        train_eval_skip=1,
    )
Exemplo n.º 3
0
def test1(LEARNER=LearnerOpts.DAGGER):
    print
    print 'Running test 1 with learner=%s' % LEARNER
    print '======================================================='

    n_states = 3
    n_actions = 2

    tRNN = TransitionRNN([mdp.MDPFeatures(n_states, noise_rate=0.5)],
                         [AttendAt(lambda _: 0, 's')], n_actions)
    policy = LinearPolicy(tRNN, n_actions)

    p_rollin_ref = stochastic(ExponentialAnnealing(0.99))
    p_rollout_ref = stochastic(ExponentialAnnealing(1))

    optimizer = torch.optim.Adam(policy.parameters(), lr=0.01)

    test_mdp, pi_ref = make_ross_mdp()

    if LEARNER == LearnerOpts.DAGGER:
        learner = lambda: DAgger(pi_ref, policy, p_rollin_ref)
    elif LEARNER == LearnerOpts.TWISTED:
        learner = lambda: TwistedDAgger(pi_ref, policy, p_rollin_ref)
    elif LEARNER == LearnerOpts.MAXLIK:
        learner = lambda: MaximumLikelihood(pi_ref, policy)
    elif LEARNER == LearnerOpts.AGGREVATE:
        learner = lambda: AggreVaTe(pi_ref, policy, p_rollin_ref)
    elif LEARNER == LearnerOpts.LOLS:
        learner = None

    losses = []
    for epoch in xrange(101):
        optimizer.zero_grad()
        if learner is not None:
            l = learner()
            env = test_mdp.mk_env()
            res = env.run_episode(l)
            loss = mdp.MDPLoss()(test_mdp, env)
            l.update(loss)
        elif LEARNER == LearnerOpts.LOLS:
            lols(test_mdp, mdp.MDPLoss, pi_ref, policy, p_rollin_ref,
                 p_rollout_ref)

        optimizer.step()
        p_rollin_ref.step()
        p_rollout_ref.step()

        env = test_mdp.mk_env()
        res = env.run_episode(policy)
        loss = mdp.MDPLoss()(test_mdp, env)
        losses.append(loss)
        if epoch % 20 == 0:
            print epoch, sum(losses[-100:]) / len(losses[-100:]), '\t', res
Exemplo n.º 4
0
def test2():
    # aggrevate
    print
    print '# test sequence labeler on mod data with AggreVaTe'
    n_types = 10
    n_labels = 4

    data = macarico.util.make_sequence_mod_data(100, 5, n_types, n_labels)
    data = [Example(x, y, n_labels) for x, y in data]

    tRNN = TransitionRNN(
        [RNNFeatures(n_types)],
        [AttendAt()],
        n_labels,
    )
    policy = LinearPolicy(tRNN, n_labels)

    p_rollin_ref = stochastic(ExponentialAnnealing(0.99))
    optimizer = torch.optim.Adam(policy.parameters(), lr=0.01)

    macarico.util.trainloop(
        training_data=data[:len(data) // 2],
        dev_data=data[len(data) // 2:],
        policy=policy,
        Learner=lambda: AggreVaTe(HammingLossReference(), policy, p_rollin_ref
                                  ),
        losses=HammingLoss(),
        optimizer=optimizer,
        run_per_epoch=[p_rollin_ref.step],
        n_epochs=4,
        train_eval_skip=1,
    )
Exemplo n.º 5
0
def test0():
    print
    print '# test sequence labeler on mod data with DAgger'
    n_types = 10
    n_labels = 4

    data = [
        Example(x, y, n_labels)
        for x, y in macarico.util.make_sequence_mod_data(
            100, 5, n_types, n_labels)
    ]

    tRNN = Actor([RNNFeatures(n_types, output_field='mytok_rnn')],
                 [AttendAt(field='mytok_rnn')], n_labels)
    policy = LinearPolicy(tRNN, n_labels)

    p_rollin_ref = stochastic(ExponentialAnnealing(0.99))
    optimizer = torch.optim.Adam(policy.parameters(), lr=0.01)

    macarico.util.trainloop(
        training_data=data[:len(data) // 2],
        dev_data=data[len(data) // 2:],
        policy=policy,
        Learner=lambda: DAgger(HammingLossReference(), policy, p_rollin_ref),
        losses=HammingLoss(),
        optimizer=optimizer,
        run_per_epoch=[p_rollin_ref.step],
        n_epochs=4,
        train_eval_skip=1,
    )
Exemplo n.º 6
0
def test_wsj():
    print
    print '# test on wsj subset'
    from macarico.data import nlp_data
    tr,de,te,vocab,label_id = \
      nlp_data.read_wsj_pos('data/wsj.pos', n_tr=50, n_de=50, n_te=0)

    n_types = len(vocab)
    n_labels = len(label_id)

    print 'n_train: %s, n_dev: %s, n_test: %s' % (len(tr), len(de), len(te))
    print 'n_types: %s, n_labels: %s' % (n_types, n_labels)

    tRNN = TransitionRNN([RNNFeatures(n_types, rnn_type='RNN')], [AttendAt()],
                         n_labels)
    policy = LinearPolicy(tRNN, n_labels)

    p_rollin_ref = stochastic(ExponentialAnnealing(0.9))
    optimizer = torch.optim.Adam(policy.parameters(), lr=0.01)

    macarico.util.trainloop(
        training_data=tr,
        dev_data=de,
        policy=policy,
        Learner=lambda: DAgger(HammingLossReference(), policy, p_rollin_ref),
        #        Learner         = lambda: MaximumLikelihood(HammingLossReference(), policy),
        losses=HammingLoss(),
        optimizer=optimizer,
        run_per_epoch=[p_rollin_ref.step],
        n_epochs=10,
        #        train_eval_skip = None,
    )
Exemplo n.º 7
0
 def __init__(self, policy, reference, p_rollin_ref=NoAnnealing(0)):
     macarico.Learner.__init__(self)
     assert isinstance(policy, CostSensitivePolicy)
     self.rollin_ref = stochastic(p_rollin_ref)
     self.policy = policy
     self.reference = reference
     self.objective = 0.0
Exemplo n.º 8
0
 def __init__(
         self,
         policy,
         reference,
         loss_fn,
         p_rollin_ref=NoAnnealing(0),
         p_rollout_ref=NoAnnealing(0.5),
         mixture=MIX_PER_ROLL,
 ):
     macarico.LearningAlg.__init__(self)
     self.policy = policy
     self.reference = reference
     self.loss_fn = loss_fn()
     self.rollin_ref = stochastic(p_rollin_ref)
     self.rollout_ref = stochastic(p_rollout_ref)
     self.mixture = mixture
     self.rollout = None
     self.true_costs = torch.zeros(self.policy.n_actions)
     self.warned_rollout_ref = False
Exemplo n.º 9
0
def test1(learning_method, exploration):
    print
    print '# testing learning_method=%d exploration=%d' % (learning_method,
                                                           exploration)
    print
    n_types = 10
    n_labels = 2
    data = macarico.util.make_sequence_mod_data(100, 1, n_types, n_labels)
    data = [Example(x, y, n_labels) for x, y in data]

    bag_size = 5
    tRNN = [
        TransitionRNN([RNNFeatures(n_types)], [AttendAt()], n_labels)
        for i in range(bag_size)
    ]
    policy = BootstrapPolicy(tRNN, n_labels)
    #policy = LinearPolicy(tRNN[0], n_labels)
    #print 'policy=', policy
    #print 'parameters=', list(policy.parameters())
    optimizer = torch.optim.Adam(policy.parameters(), lr=0.01)

    p_rollin_ref = stochastic(ExponentialAnnealing(0.9))
    p_rollout_ref = stochastic(ExponentialAnnealing(0.99999))

    macarico.util.trainloop(
        training_data=data[:len(data) // 2],
        dev_data=data[len(data) // 2:],
        policy=policy,
        Learner=lambda: BanditLOLS(
            HammingLossReference(),
            policy,
            p_rollin_ref,
            p_rollout_ref,
            learning_method,
            exploration,
        ),
        losses=HammingLoss(),
        optimizer=optimizer,
        run_per_batch=[p_rollin_ref.step, p_rollout_ref.step],
        train_eval_skip=1,
        n_epochs=2,
    )
Exemplo n.º 10
0
    def __init__(
            self,
            policy,
            reference=None,
            p_rollin_ref=NoAnnealing(0),
            p_rollout_ref=NoAnnealing(0.5),
            update_method=LEARN_MTR,
            exploration=EXPLORE_BOLTZMANN,
            p_explore=NoAnnealing(1.0),
            mixture=LOLS.MIX_PER_ROLL,
    ):
        macarico.Learner.__init__(self)
        if reference is None:
            reference = lambda s: np.random.choice(list(s.actions))
        self.policy = policy
        self.reference = reference
        self.rollin_ref = stochastic(p_rollin_ref)
        self.rollout_ref = stochastic(p_rollout_ref)
        self.update_method = update_method
        self.exploration = exploration
        self.explore = stochastic(p_explore)
        self.mixture = mixture

        assert self.update_method in range(BanditLOLS._LEARN_MAX), \
            'unknown update_method, must be one of BanditLOLS.LEARN_*'
        assert self.exploration in range(BanditLOLS._EXPLORE_MAX), \
            'unknown exploration, must be one of BanditLOLS.EXPLORE_*'

        self.dev_t = None
        self.dev_a = None
        self.dev_actions = None
        self.dev_imp_weight = None
        self.dev_costs = None
        self.rollout = None
        self.t = None
        self.disallow = torch.zeros(self.policy.n_actions)
        self.truth = torch.zeros(self.policy.n_actions)
Exemplo n.º 11
0
def test1(use_bootstrap):
    n_types = 10
    n_labels = 4
    print
    print '# test sequence labeler on mod data with Reslope and', (
        'bootstrap' if use_bootstrap else 'boltzmann'), 'exploration'
    data = macarico.util.make_sequence_mod_data(3000, 6, n_types, n_labels)
    data = [Example(x, y, n_labels) for x, y in data]

    if not use_bootstrap:
        tRNN = TransitionRNN([RNNFeatures(n_types)], [AttendAt()], n_labels)
        policy = LinearPolicy(tRNN, n_labels)
    else:
        rnns = [
            TransitionRNN([RNNFeatures(n_types)], [AttendAt()],
                          n_labels,
                          h_name='h%d' % i) for i in xrange(5)
        ]
        policy = BootstrapPolicy(rnns, n_labels)

    optimizer = torch.optim.Adam(policy.parameters(), lr=0.01)

    p_ref = stochastic(ExponentialAnnealing(0.9))

    macarico.util.trainloop(
        training_data   = data[:2048],
        dev_data        = data[2048:],
        policy          = policy,
        Learner         = lambda: Reslope(HammingLossReference(), policy, p_ref,
                                          exploration=BanditLOLS.EXPLORE_BOOTSTRAP if use_bootstrap else \
                                                      BanditLOLS.EXPLORE_BOLTZMANN
        ),
        losses          = HammingLoss(),
        optimizer       = optimizer,
        run_per_epoch   = [p_ref.step],
        train_eval_skip = 1,
        bandit_evaluation = True,
        n_epochs = 1,
    )
Exemplo n.º 12
0
    def __init__(self,
                 reference,
                 policy,
                 p_ref,
                 learning_method=BanditLOLS.LEARN_DR,
                 exploration=BanditLOLS.EXPLORE_BOLTZMANN,
                 explore=1.0,
                 mixture=BanditLOLS.MIX_PER_ROLL,
                 temperature=1.):
        self.reference = reference
        self.policy = policy
        self.learning_method = learning_method
        self.exploration = exploration
        self.temperature = temperature
        assert self.learning_method in range(BanditLOLS._LEARN_MAX), \
            'unknown learning_method, must be one of BanditLOLS.LEARN_*'
        assert self.exploration in range(BanditLOLS._EXPLORE_MAX), \
            'unknown exploration, must be one of BanditLOLS.EXPLORE_*'

        if mixture == BanditLOLS.MIX_PER_ROLL:
            use_ref = p_ref()
            self.use_ref = lambda: use_ref
        else:
            self.use_ref = p_ref
        if isinstance(explore, float):
            explore = stochastic(NoAnnealing(explore))
        self.explore = explore
        self.t = None
        self.dev_t = []
        self.dev_a = []
        self.dev_actions = []
        self.dev_imp_weight = []
        self.dev_costs = []
        self.squared_loss = 0.
        self.pred_act_cost = []

        macarico.Learner.__init__(self)
Exemplo n.º 13
0
 def __init__(self, policy, reference, p_rollin_ref=NoAnnealing(0)):
     macarico.Learner.__init__(self)
     self.rollin_ref = stochastic(p_rollin_ref)
     self.policy = policy
     self.reference = reference
     self.objective = 0.0
Exemplo n.º 14
0
def test1(task=0, LEARNER=LearnerOpts.DAGGER):
    print
    print 'Running test 1 (v%d) with learner=%s' % (task, LEARNER)
    print '======================================================='

    if task == 0:
        print 'Sequence reversal task, easy version'
        data = macarico.util.make_sequence_reversal_data(100, 5, 5)
        foci = [AttendAt(lambda s: s.N - s.n - 1)]
    elif task == 1:
        print 'Sequence reversal task, hard version'
        data = macarico.util.make_sequence_reversal_data(1000, 5, 5)
        foci = [AttendAt()]
    elif task == 2:
        print 'Sequence reversal task, multi-focus version'
        data = macarico.util.make_sequence_reversal_data(100, 5, 5)
        foci = [AttendAt(), AttendAt(lambda s: s.N - s.n - 1)]
    elif task == 3:
        print 'Memoryless task, add-one mod K'
        data = macarico.util.make_sequence_mod_data(50, 5, 10, 3)
        foci = [AttendAt()]
    elif task == 4:
        print 'Matti-style data'
        data = make_matti_data(1000, 20, 2, 0.05)
        foci = [AttendAt()]

    n_types = 1 + max({x for X, _ in data for x in X})
    n_labels = 1 + max({y for _, Y in data for y in Y})

    data = [Example(x, y, n_labels) for x, y in data]

    random.shuffle(data)
    m = len(data) // 2
    train = data[:m]
    dev = data[m:]

    print 'n_train: %s, n_dev: %s' % (len(train), len(dev))
    print 'n_types: %s, n_labels: %s' % (n_types, n_labels)
    print 'learner:', LEARNER
    print

    tRNN = Actor([RNNFeatures(n_types)], foci, n_labels)
    policy = LinearPolicy(tRNN, n_labels)

    baseline = EWMA(0.8)
    p_rollin_ref = stochastic(ExponentialAnnealing(0.5))
    p_rollout_ref = stochastic(ExponentialAnnealing(0.5))

    if LEARNER == LearnerOpts.AC:
        from macarico.lts.reinforce import AdvantageActorCritic, LinearValueFn
        baseline = LinearValueFn(policy.features)
        policy.vfa = baseline  # adds params to policy via nn.module

    optimizer = torch.optim.Adam(policy.parameters(), lr=0.01)

    if LEARNER == LearnerOpts.DAGGER:
        learner = lambda: DAgger(HammingLossReference(), policy, p_rollin_ref)
    elif LEARNER == LearnerOpts.TWISTED:
        learner = lambda: TwistedDAgger(HammingLossReference(), policy,
                                        p_rollin_ref)
    elif LEARNER == LearnerOpts.MAXLIK:
        learner = lambda: MaximumLikelihood(HammingLossReference(), policy)
    elif LEARNER == LearnerOpts.AC:
        learner = lambda: AdvantageActorCritic(policy, baseline)
    elif LEARNER == LearnerOpts.REINFORCE:
        learner = lambda: Reinforce(policy, baseline)
    elif LEARNER == LearnerOpts.BANDITLOLS:
        learner = lambda: BanditLOLS(HammingLossReference(
        ), policy, p_rollin_ref, p_rollout_ref, BanditLOLS.LEARN_DR, BanditLOLS
                                     .EXPLORE_UNIFORM, baseline)

    macarico.util.trainloop(
        training_data=train,
        dev_data=dev,
        policy=policy,
        Learner=learner,
        losses=HammingLoss(),
        optimizer=optimizer,
        run_per_epoch=[p_rollin_ref.step, p_rollout_ref.step],
        n_epochs=10,
        train_eval_skip=1,
    )