Exemplo n.º 1
0
def run_test(agent, num_exp=100):

    # set up environment
    env = envs.make(args.env)

    all_total_reward = []

    # run experiment
    for ep in range(num_exp):

        env.set_random_seed(100000000 + ep)
        env.reset()

        total_reward = 0

        state = env.observe()
        done = False

        while not done:
            act = agent.get_action(state)
            state, reward, done = env.step(act)
            total_reward += reward

        all_total_reward.append(total_reward)

    return all_total_reward
Exemplo n.º 2
0
 def __init__(self, **kwargs):
     self.config = kwargs
     #self.env = environments.make('ContinuousThor-v0', goals = ['laptop'], scenes = list(range(201, 230)))
     self.env = environments.make('House-v0',
                                  scene='00cfe094634578865b4384f3adef49e6',
                                  goals=['kitchen'
                                         ])  #, goals = ['living_room'])
     self.obs = self.env.reset()
Exemplo n.º 3
0
def create_envs(num_training_processes, env_kwargs):
    def wrap(env):
        env = RewardCollector(env)
        env = TransposeImage(env)
        env = ScaledFloatFrame(env)
        env = UnrealEnvBaseWrapper(env)
        return env

    thunk = lambda: wrap(environments.make(**env_kwargs))
    env = SubprocVecEnv([thunk for _ in range(num_training_processes)])
    return env, None
def create_envs(num_training_processes, tasks, **env_kwargs):
    def wrap(env):
        env = RewardCollector(env)
        env = TransposeImage(env)
        env = ScaledFloatFrame(env)
        env = UnrealEnvBaseWrapper(env)
        return env

    env_fns = [lambda: wrap(environments.make(graph_name = scene, goals = goal, **env_kwargs)) for (scene, goals) in tasks for goal in goals]
    env = SubprocVecEnv(env_fns)
    env.set_hardness = lambda hardness: env.call_unwrapped('set_complexity', hardness)
    env.set_hardness(0.1)
    #env.set_hardness(1.0)
    return env
    def __init__(self, config):
        """
        Initializes class with the configuration.
        """
        self._config = config
        self._is_chef = config.is_chef

        # create a new environment
        self._env = make("PandaGrasp", config)
        ob_space = self._env.observation_space  # e.g. OrderedDict([('object-state', [10]), ('robot-state', [36])])
        ac_space = self._env.action_space  # e.g. ActionSpace(shape=OrderedDict([('default', 8)]),minimum=-1.0, maximum=1.0)
        print('***', ac_space)

        # get actor and critic networks
        actor, critic = MlpActor, MlpCritic

        # build up networks for PPO agent
        if self._config.algo == 'sac':
            self._agent = SACAgent(config, ob_space, ac_space, actor, critic)
        else:
            self._agent = PPOAgent(config, ob_space, ac_space, actor, critic)

        # build rollout runner
        self._runner = RolloutRunner(config, self._env, self._agent)

        # setup log
        if self._is_chef and self._config.is_train:
            exclude = ['device']
            if not self._config.wandb:
                os.environ['WANDB_MODE'] = 'dryrun'

        # Weights and Biases (wandb) is used for logging, set the account details below or dry run above
        # user or team name
            entity = 'panda'
            # project name
            project = 'robo'

            wandb.init(resume=config.run_name,
                       project=project,
                       config={
                           k: v
                           for k, v in config.__dict__.items()
                           if k not in exclude
                       },
                       dir=config.log_dir,
                       entity=entity,
                       notes=config.notes)
Exemplo n.º 6
0
def main(args):
    BATCH_SIZE = args.batch_size
    MAX_EPSILON = args.max_epsilon
    MIN_EPSILON = args.min_epsilon
    decay = args.decay
    gamma = args.gamma

    env_name = args.env_name
    if env_name in ['MountainCar-v0']:
        env = gym.make(env_name)
        num_states = env.env.observation_space.shape[0]
        num_actions = env.env.action_space.n
    else:
        env = environments.make(env_name)
        num_states = env.get_num_states()
        num_actions = env.get_num_actions()


    model = fully_connected.Model(num_states, num_actions, BATCH_SIZE, layer_sizes=[10,10])
    mem = helpers.Memory(1000)

    config = tf.ConfigProto(
        device_count = {'GPU': 0}
    )
    saver = tf.train.Saver()
    model_save_dir = os.path.join('.', 'saved_models', datetime.datetime.now().strftime('%Y%m%d_%H%M%S'))
    os.makedirs(model_save_dir, exist_ok=True)
    with tf.Session(config=config) as sess:
        sess.run(model.var_init)
        gr = helpers.GameRunner(sess, model, env, mem, MAX_EPSILON, MIN_EPSILON,
                        decay, gamma)
        num_episodes = 300
        cnt = 0
        while cnt < num_episodes:
            if cnt % 50 == 0:
                print('Episode {} of {}'.format(cnt+1, num_episodes))
                gr._render = True
                gr.run()
                save_path = saver.save(sess, os.path.join(model_save_dir,"model_{:05d}.ckpt".format(cnt)))
                print("Model saved in path: %s" % save_path)
            else:
                gr._render = True
                gr.run()
            cnt += 1
Exemplo n.º 7
0
def record_videos(agent, path, screen_size):
    seed = 1
    for scene, tasks in EXPERIMENTS:
        env = environments.make('GoalHouse-v1',
                                screen_size=screen_size,
                                scene=scene,
                                goals=None)
        env = RenderVideoWrapper(env, path)
        env = agent.wrap_env(env)
        env.seed(seed)
        for task in tasks:
            agent.reset_state()
            if task is not None:
                env.unwrapped.set_next_task(task)
            obs = env.reset()
            if task is None:
                print(env.unwrapped.state)
            done = False
            while not done:
                obs, _, done, _ = env.step(agent.act(obs))
Exemplo n.º 8
0
import environments

if __name__ == '__main__':
    #from graph.util import load_graph
    #env = environments.make('AuxiliaryGraph-v0', graph_file = '/home/jonas/.visual_navigation/scenes/thor-cached-225.pkl')
    #env.unwrapped.browse().show()

    # env = environments.make('CachedThor-v0', goals = [], h5_file_path = 'test.h5') #, goals = [], scenes = 311)
    # env.unwrapped.browse().show()

    env = environments.make('AuxiliaryThor-v1', goals=[], scenes=311, enable_noise=True)
    env.unwrapped.browse().show()
Exemplo n.º 9
0
        'A_VisitsDecay_3c6267913c894118a9e60bc796652cc7',
        'A_UCB_d9546ca2809d43e4b8b5be4a5d5c33ac',
        'A_UCB_341235ec2ac7476981f9be3281506c7a',
        'A_DecayRate_2923aed748004df4819f142b64433e8e',
        'A_AlwaysGreedy_3cab8bb2ecd94981bd57c46af642e9f8'
    ]
    if value in exceps:
        return False
    else:
        return True


OTHERS = list(
    filter(exceptions, [a for a in list_saved_agents(filter='unique')]))
AGENTS = [a for a in TOP]
env = environments.make('hitstand')


def get_features(route):
    def translate_null(value, lr=False):
        if value:
            return value

        else:
            if lr:
                return '1/visits'

            else:
                return 'N/A'

    def rename(feats_dict):
Exemplo n.º 10
0
def training_agent(agent_id, params_queue, reward_queue, adv_queue,
                   gradient_queue):
    np.random.seed(args.seed)  # for environment
    tf.set_random_seed(agent_id)  # for model evolving

    sess = tf.Session()

    # set up actor agent for training
    actor_agent = ActorAgent(sess)
    critic_agent = CriticAgent(sess, input_dim=args.num_workers + 2)

    # set up envrionemnt
    env = envs.make(args.env)

    # collect experiences
    while True:
        # get parameters from master
        (actor_params, critic_params, entropy_weight, model_idx) = \
            params_queue.get()

        # synchronize model parameters
        actor_agent.set_params(actor_params)
        critic_agent.set_params(critic_params)

        # reset environment
        env.set_random_seed(model_idx)
        env.reset()

        # set up training storage
        batch_inputs, batch_act_vec, batch_wall_time, batch_reward = \
            [], [], [], []

        # run experiment
        state = env.observe()
        done = False

        while not done:

            # decompose state (for storing infomation)
            workers, job, curr_time = state

            inputs = np.zeros([1, args.num_workers + 1])
            for worker in workers:
                inputs[0, worker.worker_id] = \
                    min(sum(j.size for j in worker.queue) / \
                    args.job_size_norm_factor / 5.0,  # normalization
                    20.0)
            inputs[0, -1] = min(job.size / \
                args.job_size_norm_factor, 10.0)  # normalization

            # draw an action
            action = actor_agent.predict(inputs)[0]

            # store input and action
            batch_inputs.append(inputs)

            act_vec = np.zeros([1, args.num_workers])
            act_vec[0, action] = 1

            batch_act_vec.append(act_vec)

            # store wall time
            batch_wall_time.append(curr_time)

            # interact with environment
            state, reward, done = env.step(action)

            # scale reward for training
            reward /= args.reward_scale

            # store reward
            batch_reward.append(reward)

        # store final time
        batch_wall_time.append(env.wall_time.curr_time)

        # compute all values
        value_inputs = np.zeros([len(batch_inputs), args.num_workers + 2])
        for i in range(len(batch_inputs)):
            value_inputs[i, :-1] = batch_inputs[i]
            value_inputs[i,
                         -1] = batch_wall_time[i] / float(batch_wall_time[-1])
        batch_values = critic_agent.predict(value_inputs)

        # summarize more info for master agent
        unfinished_jobs = sum(len(worker.queue) for worker in env.workers)
        unfinished_jobs += sum(worker.curr_job is not None
                               for worker in env.workers)

        finished_work = sum(j.size for j in env.finished_jobs)
        unfinished_work = 0
        for worker in env.workers:
            for j in worker.queue:
                unfinished_work += j.size
            if worker.curr_job is not None:
                unfinished_work += worker.curr_job.size

        average_job_duration = np.mean(
            [j.finish_time - j.arrival_time for j in env.finished_jobs])

        # report rewards to master agent
        reward_queue.put([
            batch_reward,
            np.array(batch_values), batch_wall_time,
            len(env.finished_jobs), unfinished_jobs, finished_work,
            unfinished_work, average_job_duration
        ])

        # get advantage term
        batch_adv, batch_actual_value = adv_queue.get()

        # conpute gradient
        actor_gradient, loss = actor_agent.compute_gradients(
            batch_inputs, batch_act_vec, batch_adv, entropy_weight)
        critic_gradient, _ = critic_agent.compute_gradients(
            value_inputs, batch_actual_value)

        # send back gradients
        gradient_queue.put([actor_gradient, critic_gradient, loss])

    sess.close()
Exemplo n.º 11
0
def main():

    np.random.seed(args.seed)
    tf.set_random_seed(args.seed)

    # create result and model folder
    create_folder_if_not_exists(args.result_folder)

    # different agents for different environments
    if args.env == 'load_balance':
        schemes = ['shortest_processing_time', 'learn']
    else:
        schemes = ['learn']

    # tensorflow session
    sess = tf.Session()

    # store results
    all_performance = {scheme: [] for scheme in schemes}

    # create environment
    env = envs.make(args.env)

    # initialize all agents
    agents = {}
    for scheme in schemes:

        if scheme == 'learn':
            agents[scheme] = ActorAgent(sess)
            # saver for loading trained model
            saver = tf.train.Saver(max_to_keep=args.num_saved_models)
            # initialize parameters
            sess.run(tf.global_variables_initializer())
            # load trained model
            if args.saved_model is not None:
                saver.restore(sess, args.saved_model)

        elif scheme == 'leat_work':
            agents[scheme] = LeastWorkAgent()

        elif scheme == 'shortest_processing_time':
            agents[scheme] = ShortestProcessingTimeAgent()

        else:
            print('invalid scheme', scheme)
            exit(1)

    # store results
    all_performance = {}

    # plot job duration cdf
    fig = plt.figure()
    title = 'average: '

    for scheme in schemes:

        all_total_reward = run_test(agents[scheme], num_exp=args.num_ep)

        all_performance[scheme] = all_total_reward

        x, y = compute_CDF(all_total_reward)
        plt.plot(x, y)

        title += ' ' + scheme + ' '
        title += '%.2f' % np.mean(all_total_reward)

    plt.xlabel('Total reward')
    plt.ylabel('CDF')
    plt.title(title)
    plt.legend(schemes)

    fig.savefig(args.result_folder + \
        args.env + '_all_performance.png')
    plt.close(fig)

    # save all job durations
    np.save(args.result_folder + \
        args.env + '_all_performance.npy', \
        all_performance)

    sess.close()
Exemplo n.º 12
0
    argparse.ArgumentParser(description="")
    parser = argparse.ArgumentParser(
        description='Deep reactive agent scene explorer.')
    parser.add_argument('--h5_file_path',
                        type=str,
                        default='/app/data/{scene}.h5')
    parser.add_argument('--unity_path', type=str)

    parser.add_argument('--scene',
                        help='Scene to run the explorer on',
                        default='bedroom_04',
                        type=str)

    args = vars(parser.parse_args())

    from experiments.data import TRAIN, VALIDATION
    env = environments.make(
        'GoalHouse-v1',
        screen_size=(500, 500),
        scene=['0b6d4fe900eaddd80aecf4bc79248dd9']
    )  #['b814705bc93d428507a516b866efda28','e3ae3f7b32cf99b29d3c8681ec3be321','5f3f959c7b3e6f091898caa8e828f110'])

    #from environments.gym_house.video import RenderVideoWrapper
    #env = RenderVideoWrapper(env, '')
    '''
    208,
    212
    '''

    #env = environments.make('AuxiliaryGraph-v0', goals = (5, 6, 2), graph_name = 'thor-cached-225') #  graph_file = 'kitchen.pkl')
    GoalKeyboardAgent(env).show()
def main():

    np.random.seed(args.seed)
    tf.set_random_seed(args.seed)

    # create result and model folder
    create_folder_if_not_exists(args.result_folder)

    # different agents for different environments
    if args.env == 'load_balance':
        schemes = ['shortest_processing_time', 'learn']
    else:
        print 'Schemes for ' + args.env + ' does not exist'
        exit(1)

    # tensorflow session
    sess = tf.Session()

    # store results
    all_performance = {scheme: [] for scheme in schemes}

    # create environment
    env = envs.make(args.env)

    # initialize all agents
    agents = {}
    for scheme in schemes:

        if scheme == 'learn':
            agents[scheme] = ActorAgent(sess)
            # saver for loading trained model
            saver = tf.train.Saver(max_to_keep=args.num_saved_models)
            # initialize parameters
            sess.run(tf.global_variables_initializer())
            # load trained model
            if args.saved_model is not None:
                saver.restore(sess, args.saved_model)

        elif scheme == 'leat_work':
            agents[scheme] = LeastWorkAgent()

        elif scheme == 'shortest_processing_time':
            agents[scheme] = ShortestProcessingTimeAgent()

        else:
            print 'invalid scheme', scheme
            exit(1)

    # run testing experiments
    for ep in xrange(args.num_ep):

        for scheme in schemes:

            # reset the environment with controlled seed
            env.set_random_seed(ep)
            env.reset()

            # pick agent
            agent = agents[scheme]

            # store total reward
            total_reward = 0

            # -- run the environment --
            t1 = time.time()

            state = env.observe()
            done = False

            while not done:
                action = agent.get_action(state)
                state, reward, done = env.step(action)
                total_reward += reward

            t2 = time.time()
            print 'Elapsed', scheme, t2 - t1, 'seconds'

            all_performance[scheme].append(total_reward)

        # plot job duration cdf
        fig = plt.figure()

        title = 'average: '

        for scheme in schemes:
            x, y = compute_CDF(all_performance[scheme])
            plt.plot(x, y)

            title += ' ' + scheme + ' '
            title += '%.2f' % np.mean(all_performance[scheme])

        plt.xlabel('Total reward')
        plt.ylabel('CDF')
        plt.title(title)
        plt.legend(schemes)

        fig.savefig(args.result_folder + \
            args.env + '_all_performance.png')
        plt.close(fig)

        # save all job durations
        np.save(args.result_folder + \
            args.env + '_all_performance.npy', \
            all_performance)

    sess.close()