def learn_option(option, environment_name, num_episodes, max_steps):
    """
    :param source: the source community
    :type source: int
    :param target: the target community
    :param target: int
    """
    from pyrl.agents.sarsa_lambda import sarsa_lambda
    from pyrl.rlglue import RLGlueLocal as RLGlueLocal
    from pyrl.environments.pinball import PinballRLGlue
    import numpy as np
    import logging
    import pyflann
    import options
    import cPickle
    import random
    import csv

    prefix = 'option-%d-to-%d'%(option.label, option.target)
    score_file = csv.writer(open(prefix + '-score.csv', 'wb'))

    # Create agent and environments
    agent = sarsa_lambda(epsilon=0.01, alpha=0.001, gamma=0.9, lmbda=0.9,
    params={'name':'fourier', 'order':4})

    # Wrap the environment with the option's pseudo-reward
    environment = options.TrajectoryRecorder(options.PseudoRewardEnvironment(PinballRLGlue(environment_name), option, 10000), prefix + '-trajectory')

    # Connect to RL-Glue
    rlglue = RLGlueLocal.LocalGlue(environment, agent)
    rlglue.RL_init()

    # Execute episodes
    if not num_episodes:
        num_episodes = np.alen(option.initial_states)
        print 'Learning %d episodes'%(num_episodes,)

    for i in xrange(num_episodes):
        initial_state = option.initial_state()
        rlglue.RL_env_message('set-start-state %f %f %f %f'
               %(initial_state[0], initial_state[1], initial_state[2], initial_state[3]))

        terminated = rlglue.RL_episode(max_steps)

        total_steps = rlglue.RL_num_steps()
        total_reward = rlglue.RL_return()

        with open(prefix + '-score.csv', 'a') as f:
            writer = csv.writer(f)
            writer.writerow([i, total_steps, total_reward, terminated])

    rlglue.RL_cleanup()

    # Save function approximation
    option.basis = agent.basis
    option.weights = agent.weights[0,:,:]

    cPickle.dump(option, open(prefix + '-policy.pl', 'wb'))

    return option
def learn_policy(environment_name, nepisodes, max_steps, prefix):
    from pyrl.agents.sarsa_lambda import sarsa_lambda
    from pyrl.rlglue import RLGlueLocal as RLGlueLocal
    from pyrl.environments.pinball import PinballRLGlue
    from options import TrajectoryRecorder
    import cPickle
    import csv

    # Create agent and environments
    agent = sarsa_lambda(epsilon=0.01, alpha=0.001, gamma=1.0, lmbda=0.9,
    params={'name':'fourier', 'order':4})

    # Wrap the environment with the option's pseudo-reward
    environment = TrajectoryRecorder(PinballRLGlue(environment_name), prefix + '-trajectory')

    score_file = csv.writer(open(prefix + '-scores.csv', 'wb'))

    # Connect to RL-Glue
    rlglue = RLGlueLocal.LocalGlue(environment, agent)
    rlglue.RL_init()

    # Execute episodes
    scores = []
    for i in xrange(nepisodes):
        print 'Episode ', i
        terminated = rlglue.RL_episode(max_steps)
        total_steps = rlglue.RL_num_steps()
        total_reward = rlglue.RL_return()

        print '\t %d steps, %d reward, %d terminated'%(total_steps, total_reward, terminated)
        score = [i, total_steps, total_reward, terminated]
        scores.append(score)
        score_file.writerow(score)

    rlglue.RL_cleanup()

    cPickle.dump(agent, open(prefix + '.pl', 'wb'))

    return scores
示例#3
0
def learn_option(option, environment_name, num_episodes, max_steps):
    """
    :param source: the source community
    :type source: int
    :param target: the target community
    :param target: int
    """
    from pyrl.agents.sarsa_lambda import sarsa_lambda
    from pyrl.rlglue import RLGlueLocal as RLGlueLocal
    from pyrl.environments.pinball import PinballRLGlue
    import numpy as np
    import logging
    import pyflann
    import options
    import cPickle
    import random
    import csv

    prefix = 'option-%d-to-%d' % (option.label, option.target)
    score_file = csv.writer(open(prefix + '-score.csv', 'wb'))

    # Create agent and environments
    agent = sarsa_lambda(epsilon=0.01,
                         alpha=0.001,
                         gamma=0.9,
                         lmbda=0.9,
                         params={
                             'name': 'fourier',
                             'order': 4
                         })

    # Wrap the environment with the option's pseudo-reward
    environment = options.TrajectoryRecorder(
        options.PseudoRewardEnvironment(PinballRLGlue(environment_name),
                                        option, 10000), prefix + '-trajectory')

    # Connect to RL-Glue
    rlglue = RLGlueLocal.LocalGlue(environment, agent)
    rlglue.RL_init()

    # Execute episodes
    if not num_episodes:
        num_episodes = np.alen(option.initial_states)
        print 'Learning %d episodes' % (num_episodes, )

    for i in xrange(num_episodes):
        initial_state = option.initial_state()
        rlglue.RL_env_message('set-start-state %f %f %f %f' %
                              (initial_state[0], initial_state[1],
                               initial_state[2], initial_state[3]))

        terminated = rlglue.RL_episode(max_steps)

        total_steps = rlglue.RL_num_steps()
        total_reward = rlglue.RL_return()

        with open(prefix + '-score.csv', 'a') as f:
            writer = csv.writer(f)
            writer.writerow([i, total_steps, total_reward, terminated])

    rlglue.RL_cleanup()

    # Save function approximation
    option.basis = agent.basis
    option.weights = agent.weights[0, :, :]

    cPickle.dump(option, open(prefix + '-policy.pl', 'wb'))

    return option