def main(args=None): # Parse arguments if args is None: args = sys.argv[1:] args = parse_args(args) # Check if a GPU ID was set if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu set_session(get_session()) # Environment Initialization if(args.is_atari): # Atari Environment Wrapper env = AtariEnvironment(args) state_dim = env.get_state_size() action_dim = env.get_action_size() elif(args.type=="DDPG"): # Continuous Environments Wrapper env = Environment(gym.make(args.env), args.consecutive_frames) env.reset() state_dim = env.get_state_size() action_space = gym.make(args.env).action_space action_dim = action_space.high.shape[0] act_range = action_space.high else: # Standard Environments env = Environment(gym.make(args.env), args.consecutive_frames) env.reset() state_dim = env.get_state_size() action_dim = gym.make(args.env).action_space.n # Pick algorithm to train if(args.type=="DDQN"): algo = DDQN(action_dim, state_dim, args) algo.load_weights(args.model_path) elif(args.type=="A2C"): algo = A2C(action_dim, state_dim, args.consecutive_frames) algo.load_weights(args.actor_path, args.critic_path) elif(args.type=="A3C"): algo = A3C(action_dim, state_dim, args.consecutive_frames, is_atari=args.is_atari) algo.load_weights(args.actor_path, args.critic_path) elif(args.type=="DDPG"): algo = DDPG(action_dim, state_dim, act_range, args.consecutive_frames) algo.load_weights(args.actor_path, args.critic_path) # Display agent old_state, time = env.reset(), 0 while True: env.render() a = algo.policy_action(old_state) old_state, r, done, _ = env.step(a) time += 1 if done: env.reset() env.env.close()
def main(args=None): # Parse arguments if args is None: args = sys.argv[1:] args = parse_args(args) # Check if a GPU ID was set if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu set_session(get_session()) # Environment Initialization if args.is_ai2thor: config_dict = {'max_episode_length': 2000} env = AI2ThorEnv(config_dict=config_dict) env.reset() state = env.reset() state_dim = state.shape action_dim = env.action_space.n elif (args.is_atari): # Atari Environment Wrapper env = AtariEnvironment(args) state_dim = env.get_state_size() action_dim = env.get_action_size() else: # Standard Environments env = Environment(gym.make(args.env), args.consecutive_frames) env.reset() state_dim = env.get_state_size() action_dim = gym.make(args.env).action_space.n algo = A3C(action_dim, state_dim, args.consecutive_frames, is_atari=args.is_atari, is_ai2thor=args.is_ai2thor) algo.load_weights(args.actor_path, args.critic_path) # Display agent old_state, time = env.reset(), 0 while True: a = algo.policy_action(old_state) old_state, r, done, _ = env.step(a) time += 1 if done: print('----- done, resetting env ----') env.reset()
def main(args=None): # Parse arguments if args is None: args = sys.argv[1:] args = parse_args(args) if args.wandb: wandb.init(entity=args.wandb_id, project=args.wandb_project) # Environment Initialization if (args.is_atari): # Atari Environment Wrapper env = AtariEnvironment(args) state_dim = env.get_state_size() action_dim = env.get_action_size() elif (args.type == "DDPG"): # Continuous Environments Wrapper env = Environment(gym.make(args.env), args.consecutive_frames) env.reset() state_dim = env.get_state_size() action_space = gym.make(args.env).action_space action_dim = action_space.high.shape[0] act_range = action_space.high else: # Standard Environments env = Environment(gym.make(args.env), args.consecutive_frames) env.reset() state_dim = env.get_state_size() action_dim = gym.make(args.env).action_space.n # Pick algorithm to train if (args.type == "DDQN"): algo = DDQN(action_dim, state_dim, args) elif (args.type == "A2C"): algo = A2C(action_dim, state_dim, args.consecutive_frames) elif (args.type == "A3C"): algo = A3C(action_dim, state_dim, args.consecutive_frames, is_atari=args.is_atari) elif (args.type == "DDPG"): algo = DDPG(action_dim, state_dim, act_range, args.consecutive_frames) # Train stats = algo.train(env, args) # e, mean, stdev: e is episode # Export results to CSV if (args.gather_stats): df = pd.DataFrame(np.array(stats)) df.to_csv(args.type + "/logs.csv", header=['Episode', 'Mean', 'Stddev'], float_format='%10.5f') # Save weights and close environments exp_dir = '{}/models/'.format(args.type) if not os.path.exists(exp_dir): os.makedirs(exp_dir) export_path = '{}{}_ENV_{}_NB_EP_{}_BS_{}'.format(exp_dir, args.type, args.env, args.nb_episodes, args.batch_size) algo.save_weights(export_path) env.env.close()
def main(args=None): # Parse arguments if args is None: args = sys.argv[1:] args = parse_args(args) # Check if a GPU ID was set # if args.gpu: # os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu # sess = get_session() # set_session(sess) # K.set_session(sess) # with tf.device('/gpu:0'): # config = tf.ConfigProto() # config.gpu_options.allow_growth = True # sess = tf.Session(config=config) # K.set_session(sess) # set_session(sess) summary_writer = tf.summary.FileWriter(args.type + "/tensorboard_" + args.env) # Environment Initialization if (args.is_atari): # Atari Environment Wrapper env = AtariEnvironment(args) state_dim = env.get_state_size() action_dim = env.get_action_size() elif (args.type == "DDPG"): # Continuous Environments Wrapper # env = Environment(gym.make(args.env), args.consecutive_frames) # env.reset() # state_dim = env.get_state_size() # action_space = gym.make(args.env).action_space # action_dim = action_space.high.shape[0] # act_range = action_space.high env_before = gym.make(args.env) env_unwrapped = env_before.unwrapped # env_unwrapped.observation_space = env_unwrapped.observation_shape # state_dim = env_unwrapped.observation_space.shape # action_dim = env_unwrapped.action_space.n env = Environment(env_before, args.consecutive_frames) env.reset() state_dim = env.get_state_size() # action_space = env.action_space # action_dim = env.get_action_size() action_dim = env_unwrapped.action_space.shape[0] act_range = env_unwrapped.action_space.high print('state: ', state_dim) print('action: ', action_dim) print('act range', act_range) else: # Standard Environments # env = Environment(gym.make(args.env), args.consecutive_frames) # env.reset() # state_dim = env.get_state_size() # action_dim = gym.make(args.env).action_space.n #unreal env_before = gym.make(args.env) # env_unwrapped = env_before.unwrapped # env_unwrapped.observation_space = env_unwrapped.observation_shape # state_dim = env_unwrapped.observation_space.shape # action_dim = env_unwrapped.action_space.n env = Environment(env_before, args.consecutive_frames) env.reset() state_dim = env.get_state_size() action_dim = env.get_action_size() print('state: ', state_dim) print('action: ', action_dim) # state_dim= (640,380) # action_dim =3 # Pick algorithm to train print('args type: ', args.type) if (args.type == "DDQN"): algo = DDQN(action_dim, state_dim, args) elif (args.type == "A2C"): algo = A2C(action_dim, state_dim, args.consecutive_frames) elif (args.type == "A3C"): algo = A3C(action_dim, state_dim, args.consecutive_frames, is_atari=args.is_atari) elif (args.type == "DDPG"): algo = DDPG(args, action_dim, state_dim, act_range, args.consecutive_frames) if args.pretrain: print('pretrain') algo.load_weights(args.weights_path) # Train stats = algo.train(env, args, summary_writer) # Export results to CSV if (args.gather_stats): df = pd.DataFrame(np.array(stats)) df.to_csv(args.type + "/logs.csv", header=['Episode', 'Mean', 'Stddev'], float_format='%10.5f') # Save weights and close environments exp_dir = '{}/models/'.format(args.type) if not os.path.exists(exp_dir): os.makedirs(exp_dir) export_path = '{}{}_ENV_{}_NB_EP_{}_BS_{}'.format(exp_dir, args.type, args.env, args.nb_episodes, args.batch_size) algo.save_weights(export_path) env.env.close()
cumulative_avg_reward = cumulative_reward / episode # log cumulative_avg_reward over all episodes played so far wandb.log({'cumulative_avg_reward': cumulative_avg_reward}) return cumulative_avg_reward from numpy.random import seed from tensorflow import set_random_seed args = sys.argv[1:] args = parse_args(args) env = AtariEnvironment(args, (32, 32), test=True) state_dim = env.get_state_size() action_dim = env.get_action_size() action_size = env.action_space.n print("Actions available(%d): %r" % (env.action_space.n, env.env.get_action_meanings())) # initialize a new wandb run wandb.init(project="qualcomm-evaluation") # define hyperparameters wandb.config.episodes = 100 wandb.config.runpath = run_path for seed_ in [10]: #, 50, 100, 200, 500]: seed(seed_)
def main(args=None): # Parse arguments if args is None: args = sys.argv[1:] args = parse_args(args) # Check if a GPU ID was set if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu set_session(get_session()) summary_writer = tf.summary.FileWriter(args.type + "/tensorboard_" + args.env) # Environment Initialization if(args.is_atari): # Atari Environment Wrapper env = AtariEnvironment(args) state_dim = env.get_state_size() action_dim = env.get_action_size() elif(args.type=="DDPG"): # Continuous Environments Wrapper env = Environment(gym.make(args.env), args.consecutive_frames) env.reset() state_dim = env.get_state_size() action_space = gym.make(args.env).action_space action_dim = action_space.high.shape[0] act_range = action_space.high else: # Standard Environments env = Environment(gym.make(args.env), args.consecutive_frames) env.reset() state_dim = env.get_state_size() action_dim = gym.make(args.env).action_space.n # Pick algorithm to train if(args.type=="DDQN"): algo = DDQN(action_dim, state_dim, args) elif(args.type=="A2C"): algo = A2C(action_dim, state_dim, args.consecutive_frames) elif(args.type=="A3C"): algo = A3C(action_dim, state_dim, args.consecutive_frames, is_atari=args.is_atari) elif(args.type=="DDPG"): algo = DDPG(action_dim, state_dim, act_range, args.consecutive_frames) # Train stats = algo.train(env, args, summary_writer) # Export results to CSV if(args.gather_stats): df = pd.DataFrame(np.array(stats)) df.to_csv(args.type + "/logs.csv", header=['Episode', 'Mean', 'Stddev'], float_format='%10.5f') # Save weights and close environments exp_dir = '{}/models/'.format(args.type) if not os.path.exists(exp_dir): os.makedirs(exp_dir) export_path = '{}{}_ENV_{}_NB_EP_{}_BS_{}'.format(exp_dir, args.type, args.env, args.nb_episodes, args.batch_size) algo.save_weights(export_path) env.env.close()
def main(args=None): # Parse arguments if args is None: args = sys.argv[1:] args = parse_args(args) # Check if a GPU ID was set if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu set_session(get_session()) summary_writer = tf.summary.FileWriter(args.type + "/tensorboard_" + args.env) # Environment Initialization if (args.is_atari): # Atari Environment Wrapper env = AtariEnvironment(args) state_dim = env.get_state_size() action_dim = env.get_action_size() elif (args.type == "DDPG"): # Continuous Environments Wrapper env = Environment(gym.make(args.env), args.consecutive_frames) env.reset() state_dim = env.get_state_size() action_space = gym.make(args.env).action_space action_dim = action_space.high.shape[0] act_range = action_space.high else: if args.env == 'cell': #do this env = Environment(opticalTweezers(), args.consecutive_frames) # env=opticalTweezers(consecutive_frames=args.consecutive_frames) env.reset() state_dim = (6, ) action_dim = 4 #note that I have to change the reshape code for a 2d agent # should be 4 else: # Standard Environments env = Environment(gym.make(args.env), args.consecutive_frames) env.reset() state_dim = env.get_state_size() print(state_dim) action_dim = gym.make(args.env).action_space.n print(action_dim) # Pick algorithm to train if (args.type == "DDQN"): algo = DDQN(action_dim, state_dim, args) elif (args.type == "A2C"): algo = A2C(action_dim, state_dim, args.consecutive_frames) elif (args.type == "A3C"): algo = A3C(action_dim, state_dim, args.consecutive_frames, is_atari=args.is_atari) elif (args.type == "DDPG"): algo = DDPG(action_dim, state_dim, act_range, args.consecutive_frames) # Train stats = algo.train(env, args, summary_writer) # Export results to CSV if (args.gather_stats): df = pd.DataFrame(np.array(stats)) df.to_csv(args.type + "/logs.csv", header=['Episode', 'Mean', 'Stddev'], float_format='%10.5f') # Display agent old_state, time = env.reset(), 0 # all_old_states=[old_state for i in range(args.consecutive_frames)] while True: env.render() a = algo.policy_action(old_state) old_state, r, done, _ = env.step(a) time += 1 if done: env.reset()
def main(args=None): # Parse arguments if args is None: args = sys.argv[1:] args = parse_args(args) # Check if a GPU ID was set if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu # Environment Initialization if args.is_ai2thor: config_dict = {'max_episode_length': 500} env = AI2ThorEnv(config_dict=config_dict) env.reset() state = env.reset() state_dim = state.shape action_dim = env.action_space.n args.env = 'ai2thor' elif (args.is_atari): # Atari Environment Wrapper env = AtariEnvironment(args) state_dim = env.get_state_size() action_dim = env.get_action_size() print(state_dim) print(action_dim) else: # Standard Environments env = Environment(gym.make(args.env), args.consecutive_frames) env.reset() state_dim = env.get_state_size() action_dim = gym.make(args.env).action_space.n set_session(get_session()) summary_writer = tf.summary.FileWriter(args.type + "/tensorboard_" + args.env) algo = A3C(action_dim, state_dim, args.consecutive_frames, is_atari=args.is_atari, is_ai2thor=args.is_ai2thor) # Train stats = algo.train(env, args, summary_writer) # Export results to CSV if args.gather_stats: df = pd.DataFrame(np.array(stats)) df.to_csv(args.type + "/logs.csv", header=['Episode', 'Mean', 'Stddev'], float_format='%10.5f') # Save weights and close environments exp_dir = '{}/models/'.format(args.type) if not os.path.exists(exp_dir): os.makedirs(exp_dir) export_path = '{}{}_ENV_{}_NB_EP_{}_BS_{}'.format(exp_dir, args.type, args.env, args.nb_episodes, args.batch_size) algo.save_weights(export_path) env.close()