예제 #1
0
    def train(self, env, args):

        # Instantiate one environment per thread
        envs = [
            AtariEnvironment(args, self.env_dim) for i in range(args.n_threads)
        ]
        state_dim = envs[0].get_state_size()
        action_dim = envs[0].get_action_size()

        # Create threads
        tqdm_e = tqdm(range(int(args.nb_episodes)),
                      desc='Score',
                      leave=True,
                      unit=" episodes")

        threads = [
            threading.Thread(target=training_thread,
                             daemon=True,
                             args=(self, args.nb_episodes, envs[i], action_dim,
                                   args.training_interval, tqdm_e, args.render,
                                   i)) for i in range(args.n_threads)
        ]

        for t in threads:
            t.start()
            time.sleep(0.5)
        try:
            [t.join() for t in threads]
        except KeyboardInterrupt:
            print("Exiting all threads...")
        return None
예제 #2
0
    def train(self, env, args, summary_writer):

        # Instantiate one environment per thread
        if(args.is_atari):
            envs = [AtariEnvironment(args) for i in range(args.n_threads)]
            state_dim = envs[0].get_state_size()
            action_dim = envs[0].get_action_size()
        else:
            envs = [Environment(gym.make(args.env), args.consecutive_frames) for i in range(args.n_threads)]
            [e.reset() for e in envs]
            state_dim = envs[0].get_state_size()
            action_dim = gym.make(args.env).action_space.n

        # Create threads
        factor = 100.0 / (args.nb_episodes)
        tqdm_e = tqdm(range(args.nb_episodes), desc='Score', leave=True, unit=" episodes")

        threads = [threading.Thread(
                target=training_thread,
                args=(self,
                    args.nb_episodes,
                    envs[i],
                    action_dim,
                    args.training_interval,
                    summary_writer,
                    tqdm_e,
                    factor)) for i in range(args.n_threads)]

        for t in threads:
            t.start()
            time.sleep(1)
        [t.join() for t in threads]

        return None
예제 #3
0
    def train(self, env, args, summary_writer):

        # Instantiate one environment per thread
        if (args.is_ai2thor):
            config_dict = {'max_episode_length': 500}
            envs = [
                AI2ThorEnv(config_dict=config_dict)
                for i in range(args.n_threads)
            ]
            env.reset()
            state = envs[0].reset()
            state_dim = state.shape
            action_dim = envs[0].action_space.n
        elif (args.is_atari):
            envs = [AtariEnvironment(args) for i in range(args.n_threads)]
            state_dim = envs[0].get_state_size()
            action_dim = envs[0].get_action_size()
        else:
            envs = [
                Environment(gym.make(args.env), args.consecutive_frames)
                for i in range(args.n_threads)
            ]
            [e.reset() for e in envs]
            state_dim = envs[0].get_state_size()
            action_dim = gym.make(args.env).action_space.n

        # Create threads
        tqdm_e = tqdm(range(int(args.nb_episodes)),
                      desc='Score',
                      leave=True,
                      unit=" episodes")

        threads = [
            threading.Thread(target=training_thread,
                             daemon=True,
                             args=(self, args.nb_episodes, envs[i], action_dim,
                                   args.training_interval, summary_writer,
                                   tqdm_e, args.render))
            for i in range(args.n_threads)
        ]

        for t in threads:
            t.start()
            time.sleep(0.5)
        try:
            [t.join() for t in threads]
        except KeyboardInterrupt:
            print("Exiting all threads...")
        return None
예제 #4
0
def main(args=None):

    # Parse arguments
    if args is None:
        args = sys.argv[1:]
    args = parse_args(args)

    if args.wandb:
        wandb.init(entity=args.wandb_id, project=args.wandb_project)

    # Environment Initialization
    if (args.is_atari):
        # Atari Environment Wrapper
        env = AtariEnvironment(args)
        state_dim = env.get_state_size()
        action_dim = env.get_action_size()
    elif (args.type == "DDPG"):
        # Continuous Environments Wrapper
        env = Environment(gym.make(args.env), args.consecutive_frames)
        env.reset()
        state_dim = env.get_state_size()
        action_space = gym.make(args.env).action_space
        action_dim = action_space.high.shape[0]
        act_range = action_space.high
    else:
        # Standard Environments
        env = Environment(gym.make(args.env), args.consecutive_frames)
        env.reset()
        state_dim = env.get_state_size()
        action_dim = gym.make(args.env).action_space.n

    # Pick algorithm to train
    if (args.type == "DDQN"):
        algo = DDQN(action_dim, state_dim, args)
    elif (args.type == "A2C"):
        algo = A2C(action_dim, state_dim, args.consecutive_frames)
    elif (args.type == "A3C"):
        algo = A3C(action_dim,
                   state_dim,
                   args.consecutive_frames,
                   is_atari=args.is_atari)
    elif (args.type == "DDPG"):
        algo = DDPG(action_dim, state_dim, act_range, args.consecutive_frames)

    # Train
    stats = algo.train(env, args)  # e, mean, stdev: e is episode

    # Export results to CSV
    if (args.gather_stats):
        df = pd.DataFrame(np.array(stats))
        df.to_csv(args.type + "/logs.csv",
                  header=['Episode', 'Mean', 'Stddev'],
                  float_format='%10.5f')

    # Save weights and close environments
    exp_dir = '{}/models/'.format(args.type)
    if not os.path.exists(exp_dir):
        os.makedirs(exp_dir)

    export_path = '{}{}_ENV_{}_NB_EP_{}_BS_{}'.format(exp_dir, args.type,
                                                      args.env,
                                                      args.nb_episodes,
                                                      args.batch_size)

    algo.save_weights(export_path)
    env.env.close()
예제 #5
0
def main(args=None):

    # Parse arguments
    if args is None:
        args = sys.argv[1:]
    args = parse_args(args)

    # Check if a GPU ID was set
    if args.gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    set_session(get_session())

    # Environment Initialization
    if(args.is_atari):
        # Atari Environment Wrapper
        env = AtariEnvironment(args)
        state_dim = env.get_state_size()
        action_dim = env.get_action_size()
    elif(args.type=="DDPG"):
        # Continuous Environments Wrapper
        env = Environment(gym.make(args.env), args.consecutive_frames)
        env.reset()
        state_dim = env.get_state_size()
        action_space = gym.make(args.env).action_space
        action_dim = action_space.high.shape[0]
        act_range = action_space.high
    else:
        # Standard Environments
        env = Environment(gym.make(args.env), args.consecutive_frames)
        env.reset()
        state_dim = env.get_state_size()
        action_dim = gym.make(args.env).action_space.n

    # Pick algorithm to train
    if(args.type=="DDQN"):
        algo = DDQN(action_dim, state_dim, args)
        algo.load_weights(args.model_path)
    elif(args.type=="A2C"):
        algo = A2C(action_dim, state_dim, args.consecutive_frames)
        algo.load_weights(args.actor_path, args.critic_path)
    elif(args.type=="A3C"):
        algo = A3C(action_dim, state_dim, args.consecutive_frames, is_atari=args.is_atari)
        algo.load_weights(args.actor_path, args.critic_path)
    elif(args.type=="DDPG"):
        algo = DDPG(action_dim, state_dim, act_range, args.consecutive_frames)
        algo.load_weights(args.actor_path, args.critic_path)

    # Display agent
    old_state, time = env.reset(), 0
    while True:
       env.render()
       a = algo.policy_action(old_state)
       old_state, r, done, _ = env.step(a)
       time += 1
       if done: env.reset()

    env.env.close()
예제 #6
0
def main(args=None):

    # Parse arguments
    if args is None:
        args = sys.argv[1:]
    args = parse_args(args)

    # Check if a GPU ID was set
    # if args.gpu:
    #     os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    # sess = get_session()
    # set_session(sess)
    # K.set_session(sess)
    # with tf.device('/gpu:0'):
    #     config = tf.ConfigProto()
    #     config.gpu_options.allow_growth = True
    #     sess = tf.Session(config=config)
    #     K.set_session(sess)
    #     set_session(sess)
    summary_writer = tf.summary.FileWriter(args.type + "/tensorboard_" +
                                           args.env)

    # Environment Initialization
    if (args.is_atari):
        # Atari Environment Wrapper
        env = AtariEnvironment(args)
        state_dim = env.get_state_size()
        action_dim = env.get_action_size()
    elif (args.type == "DDPG"):
        # Continuous Environments Wrapper
        # env = Environment(gym.make(args.env), args.consecutive_frames)
        # env.reset()
        # state_dim = env.get_state_size()
        # action_space = gym.make(args.env).action_space
        # action_dim = action_space.high.shape[0]
        # act_range = action_space.high

        env_before = gym.make(args.env)
        env_unwrapped = env_before.unwrapped
        # env_unwrapped.observation_space = env_unwrapped.observation_shape
        # state_dim = env_unwrapped.observation_space.shape
        # action_dim = env_unwrapped.action_space.n
        env = Environment(env_before, args.consecutive_frames)
        env.reset()

        state_dim = env.get_state_size()
        # action_space = env.action_space
        # action_dim = env.get_action_size()
        action_dim = env_unwrapped.action_space.shape[0]
        act_range = env_unwrapped.action_space.high
        print('state: ', state_dim)
        print('action: ', action_dim)
        print('act range', act_range)
    else:
        # Standard Environments
        # env = Environment(gym.make(args.env), args.consecutive_frames)
        # env.reset()
        # state_dim = env.get_state_size()
        # action_dim = gym.make(args.env).action_space.n

        #unreal
        env_before = gym.make(args.env)
        # env_unwrapped = env_before.unwrapped
        # env_unwrapped.observation_space = env_unwrapped.observation_shape
        # state_dim = env_unwrapped.observation_space.shape
        # action_dim = env_unwrapped.action_space.n
        env = Environment(env_before, args.consecutive_frames)
        env.reset()

        state_dim = env.get_state_size()
        action_dim = env.get_action_size()
        print('state: ', state_dim)
        print('action: ', action_dim)

        # state_dim= (640,380)
        # action_dim =3

    # Pick algorithm to train
    print('args type: ', args.type)
    if (args.type == "DDQN"):
        algo = DDQN(action_dim, state_dim, args)
    elif (args.type == "A2C"):
        algo = A2C(action_dim, state_dim, args.consecutive_frames)
    elif (args.type == "A3C"):
        algo = A3C(action_dim,
                   state_dim,
                   args.consecutive_frames,
                   is_atari=args.is_atari)
    elif (args.type == "DDPG"):
        algo = DDPG(args, action_dim, state_dim, act_range,
                    args.consecutive_frames)

    if args.pretrain:
        print('pretrain')
        algo.load_weights(args.weights_path)

    # Train
    stats = algo.train(env, args, summary_writer)

    # Export results to CSV
    if (args.gather_stats):
        df = pd.DataFrame(np.array(stats))
        df.to_csv(args.type + "/logs.csv",
                  header=['Episode', 'Mean', 'Stddev'],
                  float_format='%10.5f')

    # Save weights and close environments
    exp_dir = '{}/models/'.format(args.type)
    if not os.path.exists(exp_dir):
        os.makedirs(exp_dir)

    export_path = '{}{}_ENV_{}_NB_EP_{}_BS_{}'.format(exp_dir, args.type,
                                                      args.env,
                                                      args.nb_episodes,
                                                      args.batch_size)

    algo.save_weights(export_path)
    env.env.close()
예제 #7
0
    # this is the metric your models will be evaluated on
    cumulative_avg_reward = cumulative_reward / episode

    # log cumulative_avg_reward over all episodes played so far
    wandb.log({'cumulative_avg_reward': cumulative_avg_reward})

    return cumulative_avg_reward


from numpy.random import seed
from tensorflow import set_random_seed

args = sys.argv[1:]
args = parse_args(args)

env = AtariEnvironment(args, (32, 32), test=True)
state_dim = env.get_state_size()
action_dim = env.get_action_size()

action_size = env.action_space.n
print("Actions available(%d): %r" %
      (env.action_space.n, env.env.get_action_meanings()))

# initialize a new wandb run
wandb.init(project="qualcomm-evaluation")

# define hyperparameters
wandb.config.episodes = 100
wandb.config.runpath = run_path

for seed_ in [10]:  #, 50, 100, 200, 500]:
예제 #8
0
def main(args=None):

    # Parse arguments
    if args is None:
        args = sys.argv[1:]
    args = parse_args(args)

    # Check if a GPU ID was set
    if args.gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    set_session(get_session())
    summary_writer = tf.summary.FileWriter(args.type + "/tensorboard_" + args.env)

    # Environment Initialization
    if(args.is_atari):
        # Atari Environment Wrapper
        env = AtariEnvironment(args)
        state_dim = env.get_state_size()
        action_dim = env.get_action_size()
    elif(args.type=="DDPG"):
        # Continuous Environments Wrapper
        env = Environment(gym.make(args.env), args.consecutive_frames)
        env.reset()
        state_dim = env.get_state_size()
        action_space = gym.make(args.env).action_space
        action_dim = action_space.high.shape[0]
        act_range = action_space.high
    else:
        # Standard Environments
        env = Environment(gym.make(args.env), args.consecutive_frames)
        env.reset()
        state_dim = env.get_state_size()
        action_dim = gym.make(args.env).action_space.n

    # Pick algorithm to train
    if(args.type=="DDQN"):
        algo = DDQN(action_dim, state_dim, args)
    elif(args.type=="A2C"):
        algo = A2C(action_dim, state_dim, args.consecutive_frames)
    elif(args.type=="A3C"):
        algo = A3C(action_dim, state_dim, args.consecutive_frames, is_atari=args.is_atari)
    elif(args.type=="DDPG"):
        algo = DDPG(action_dim, state_dim, act_range, args.consecutive_frames)

    # Train
    stats = algo.train(env, args, summary_writer)

    # Export results to CSV
    if(args.gather_stats):
        df = pd.DataFrame(np.array(stats))
        df.to_csv(args.type + "/logs.csv", header=['Episode', 'Mean', 'Stddev'], float_format='%10.5f')

    # Save weights and close environments
    exp_dir = '{}/models/'.format(args.type)
    if not os.path.exists(exp_dir):
        os.makedirs(exp_dir)

    export_path = '{}{}_ENV_{}_NB_EP_{}_BS_{}'.format(exp_dir,
        args.type,
        args.env,
        args.nb_episodes,
        args.batch_size)

    algo.save_weights(export_path)
    env.env.close()
예제 #9
0
def main(args=None):

    # Parse arguments
    if args is None:
        args = sys.argv[1:]
    args = parse_args(args)

    # Check if a GPU ID was set
    if args.gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    set_session(get_session())
    summary_writer = tf.summary.FileWriter(args.type + "/tensorboard_" +
                                           args.env)

    # Environment Initialization
    if (args.is_atari):
        # Atari Environment Wrapper
        env = AtariEnvironment(args)
        state_dim = env.get_state_size()
        action_dim = env.get_action_size()
    elif (args.type == "DDPG"):
        # Continuous Environments Wrapper
        env = Environment(gym.make(args.env), args.consecutive_frames)
        env.reset()
        state_dim = env.get_state_size()
        action_space = gym.make(args.env).action_space
        action_dim = action_space.high.shape[0]
        act_range = action_space.high

    else:
        if args.env == 'cell':
            #do this
            env = Environment(opticalTweezers(), args.consecutive_frames)
            # env=opticalTweezers(consecutive_frames=args.consecutive_frames)
            env.reset()
            state_dim = (6, )
            action_dim = 4  #note that I have to change the reshape code for a 2d agent # should be 4
        else:
            # Standard Environments
            env = Environment(gym.make(args.env), args.consecutive_frames)
            env.reset()
            state_dim = env.get_state_size()
            print(state_dim)
            action_dim = gym.make(args.env).action_space.n
            print(action_dim)
    # Pick algorithm to train
    if (args.type == "DDQN"):
        algo = DDQN(action_dim, state_dim, args)
    elif (args.type == "A2C"):
        algo = A2C(action_dim, state_dim, args.consecutive_frames)
    elif (args.type == "A3C"):
        algo = A3C(action_dim,
                   state_dim,
                   args.consecutive_frames,
                   is_atari=args.is_atari)
    elif (args.type == "DDPG"):
        algo = DDPG(action_dim, state_dim, act_range, args.consecutive_frames)

    # Train
    stats = algo.train(env, args, summary_writer)

    # Export results to CSV
    if (args.gather_stats):
        df = pd.DataFrame(np.array(stats))
        df.to_csv(args.type + "/logs.csv",
                  header=['Episode', 'Mean', 'Stddev'],
                  float_format='%10.5f')

    # Display agent
    old_state, time = env.reset(), 0
    # all_old_states=[old_state for i in range(args.consecutive_frames)]
    while True:
        env.render()
        a = algo.policy_action(old_state)
        old_state, r, done, _ = env.step(a)
        time += 1
        if done: env.reset()
예제 #10
0
def main(args=None):
    # Parse arguments
    if args is None:
        args = sys.argv[1:]
    args = parse_args(args)
    # Check if a GPU ID was set
    if args.gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    set_session(get_session())

    # Environment Initialization
    if args.is_ai2thor:
        config_dict = {'max_episode_length': 2000}
        env = AI2ThorEnv(config_dict=config_dict)
        env.reset()
        state = env.reset()
        state_dim = state.shape
        action_dim = env.action_space.n
    elif (args.is_atari):
        # Atari Environment Wrapper
        env = AtariEnvironment(args)
        state_dim = env.get_state_size()
        action_dim = env.get_action_size()
    else:
        # Standard Environments
        env = Environment(gym.make(args.env), args.consecutive_frames)
        env.reset()
        state_dim = env.get_state_size()
        action_dim = gym.make(args.env).action_space.n

    algo = A3C(action_dim, state_dim, args.consecutive_frames, is_atari=args.is_atari, is_ai2thor=args.is_ai2thor)
    algo.load_weights(args.actor_path, args.critic_path)

    # Display agent
    old_state, time = env.reset(), 0
    while True:
        a = algo.policy_action(old_state)
        old_state, r, done, _ = env.step(a)
        time += 1
        if done:
            print('----- done, resetting env ----')
            env.reset()
예제 #11
0
def main(args=None):
    # Parse arguments
    if args is None:
        args = sys.argv[1:]
    args = parse_args(args)

    # Check if a GPU ID was set
    if args.gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    # Environment Initialization
    if args.is_ai2thor:
        config_dict = {'max_episode_length': 500}
        env = AI2ThorEnv(config_dict=config_dict)
        env.reset()
        state = env.reset()
        state_dim = state.shape
        action_dim = env.action_space.n
        args.env = 'ai2thor'
    elif (args.is_atari):
        # Atari Environment Wrapper
        env = AtariEnvironment(args)
        state_dim = env.get_state_size()
        action_dim = env.get_action_size()
        print(state_dim)
        print(action_dim)
    else:
        # Standard Environments
        env = Environment(gym.make(args.env), args.consecutive_frames)
        env.reset()
        state_dim = env.get_state_size()
        action_dim = gym.make(args.env).action_space.n
    set_session(get_session())
    summary_writer = tf.summary.FileWriter(args.type + "/tensorboard_" +
                                           args.env)
    algo = A3C(action_dim,
               state_dim,
               args.consecutive_frames,
               is_atari=args.is_atari,
               is_ai2thor=args.is_ai2thor)

    # Train
    stats = algo.train(env, args, summary_writer)

    # Export results to CSV
    if args.gather_stats:
        df = pd.DataFrame(np.array(stats))
        df.to_csv(args.type + "/logs.csv",
                  header=['Episode', 'Mean', 'Stddev'],
                  float_format='%10.5f')

    # Save weights and close environments
    exp_dir = '{}/models/'.format(args.type)
    if not os.path.exists(exp_dir):
        os.makedirs(exp_dir)

    export_path = '{}{}_ENV_{}_NB_EP_{}_BS_{}'.format(exp_dir, args.type,
                                                      args.env,
                                                      args.nb_episodes,
                                                      args.batch_size)

    algo.save_weights(export_path)
    env.close()