import gym_selfx.selfx import gym import argparse from gym import wrappers, logger parser = argparse.ArgumentParser() parser.add_argument("-n", type=int, default=1000, help="number of epochs of training") opt = parser.parse_args() if __name__ == '__main__': logger.set_level(logger.INFO) env = gym.make('selfx-billard-v0') outdir = 'results/selfx-billard' env = wrappers.Monitor(env, directory=outdir, force=True) env.seed(0) game = env.game episode_count = opt.n reward = 0 done = False for i in range(episode_count): ob = env.reset() while True:
th_std = elite_ths.std(axis=0) yield {'ys' : ys, 'theta_mean' : th_mean, 'y_mean' : ys.mean()} def do_rollout(agent, env, num_steps, render=False): total_rew = 0 ob = env.reset() for t in range(num_steps): a = agent.act(ob) (ob, reward, done, _info) = env.step(a) total_rew += reward if render and t%3==0: env.render() if done: break return total_rew, t+1 if __name__ == '__main__': logger.set_level(logger.INFO) parser = argparse.ArgumentParser() parser.add_argument('--display', action='store_true') parser.add_argument('target', nargs="?", default="CartPole-v0") args = parser.parse_args() env = gym.make(args.target) env.seed(0) np.random.seed(0) params = dict(n_iter=10, batch_size=25, elite_frac = 0.2) num_steps = 200 # You provide the directory to write to (can be an existing # directory, but can't contain previous monitor results. You can # also dump to a tempdir if you'd like: tempfile.mkdtemp().
""" Plays num_games games of Pong with actions directed by QLearningAlgorithm's getAction function, which is a function approximation by linear regression of a dictionary of features of the current state and actions. """ class QLearningAgent(object): def __init__(self, action_space): self.action_space = action_space def act(self, observation, reward, done, q): return q.getAction(observation, done) if __name__ == '__main__': logger.set_level(logger.WARN) #Opens a Pong environment env = gym.make('Pong-v0') #directory to output game statistics outdir = 'tmp/results' env = wrappers.Monitor(env, directory=outdir, force=True) env.seed(0) agent = QLearningAgent(env.action_space) num_games = 301 reward = 0 done = False score_list = [] q = QLearningAlgorithm([0, 2, 3],
import torch.optim as optim import torch.nn.functional as F import torchvision.transforms as T # set up matplotlib is_ipython = 'inline' in matplotlib.get_backend() if is_ipython: from IPython import display plt.ion() # Open AI related import gym from gym import logger as gymlogger from gym.wrappers import Monitor gymlogger.set_level(40) #error only import glob import io import base64 from IPython.display import HTML # set up OpenAi Gym render in Colab from IPython import display as ipythondisplay from pyvirtualdisplay import Display _display = Display(visible=False, # use False with Xvfb size=(1400, 900)) _ = _display.start() # if gpu is to be used device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Remove when using installed mpc.pytorch import sys sys.path.append('..') import logging import math import time import gym import numpy as np import torch import torch.autograd from gym import wrappers, logger as gym_log from mpc import mpc gym_log.set_level(gym_log.INFO) logger = logging.getLogger(__name__) logging.basicConfig( level=logging.DEBUG, format='[%(levelname)s %(asctime)s %(pathname)s:%(lineno)d] %(message)s', datefmt='%m-%d %H:%M:%S') if __name__ == "__main__": ENV_NAME = "Pendulum-v0" TIMESTEPS = 10 # T N_BATCH = 1 LQR_ITER = 5 ACTION_LOW = -2.0 ACTION_HIGH = 2.0 class PendulumDynamics(torch.nn.Module):
with open(args['grid'], 'r') as grid_file: grid_config = yaml.load(grid_file) # Overwrite config # NOTE: get() makes use of a default value in case key is not in grid_config config = {key: grid_config.get(key, config[key]) for key in config} # Build parameter grid params = list(ParameterGrid(config)) # Start clock start = time.time() print(f'About to evaluate {len(params)} parameter sets') # Disable logger logger.set_level(logger.DISABLED) # Multiprocessing pool pool = mp.Pool(processes=mp.cpu_count()) # Run final_scores = pool.map(eval_single, list(enumerate(params))) # Close pool.close() pool.join() # Finished! print(f'Execution time: {(time.time() - start) / 3600:.2f} hours') # Create recording directory if it doesn't exist
type=int, default=1000, help='maximum number of episodes to run') parser.add_argument('--verbose', action='store_true', help='output verbose logging for steps') parser.add_argument('--random_action', action='store_true', help='Random policy for comparison') parser.add_argument('--gamma', type=float, default=1.0, metavar='G', help='discount factor (default: 1.0)') parser.add_argument('--learning_rate', type=float, default=0.5, help='learning rate (default: 0.5)') parser.add_argument('--seed', type=int, metavar='N', help='random seed') args = parser.parse_args() logger.set_level(logger.INFO) if args.verbose: logger.set_level(logger.DEBUG) # Set the random seed if defined if args.seed: random.seed(args.seed) # Run the training main(args)
def wrap_env(env, task_name, logger_level=logger.INFO): logger.set_level(logger_level) outdir = os.path.join(ROOT_DIR, 'logs/' + task_name + '-results') return wrappers.Monitor(env, directory=outdir, force=True)
import gym from gym import logger as gymlogger from gym.wrappers import Monitor gymlogger.set_level(40) import tensorflow as tf import time import random import numpy as np import matplotlib import matplotlib.pyplot as plt import keras from keras.layers import Dense from keras.models import Sequential from keras.layers import Reshape from keras.layers import Conv2D from keras.layers import Flatten from keras.models import InputLayer from keras.optimizers import Adam import math import glob import io import base64 from IPython import display as ipythondisplay from pyvirtualdisplay import Display # set up and run the environment def run(): # set up display recording display = Display(visible=0, size=(1400, 900)) display = Display().start()
import argparse import sys import gym from gym import wrappers, logger class random agent(object): def _init_(self,action_space): self.action_space=action_space def act(self,obervation,reward,done): return self.action_space.sample() if __name_=='_main_': parse = argparse.ArgumentParser(descripition=None) parser.add_argument('env_id',nargs='?',default='CartPole-v0',help='Select the environment to run') args=parser.parser_args() logger.set_level(logger.INFG) env=gym.make(args.env_id) outdir='/tmp/random-agent-results' env=wrappers.Monitor(env,directory,video_callable=False,force=True) env.seed(0) agent=RandeomAgent(env.action_space) episode_count=100 reward=0 done=False for i in range(episode_count): ob=env.reset() while True: action=agent.act(ab,reward,done) ob,reward,done,_=env.step(action) if done:
zero_completed_obs = np.zeros((NUM_CPU,) + ob_shape) zero_completed_obs[0, :] = test_env.reset() state = None for _ in range(L): action, state = model.predict(zero_completed_obs, state=state, deterministic=True) zero_completed_obs[0, :], reward, done, _ = test_env.env_method('step', action[0], indices=0)[0] sharpe_ratios.append(test_env.env_method('get_sharpe_ratio', indices=0)[0]) if plot: test_env.env_method('render', indices=0) test_env.close() # Return the average sharpe ratio return sum(sharpe_ratios) / len(sharpe_ratios) if __name__ == '__main__': logger.set_level(logger.ERROR) parser = argparse.ArgumentParser() parser.add_argument('--env', type=str) parser.add_argument('--use_sigmoid_layer', type=bool, default=False, help='Whether or not to use SigmoidMlpPolicy. Drop this flag to use MlpPolicy.') parser.add_argument('--optimize', type=bool, default=False, help='Search for optimal hyperparameters. Drop this flag to run the actual training.') parser.add_argument('--num_trials', type=int, default=10, help='Number of trials to search for optimal hyperparameters.') parser.add_argument('--evaluation_epochs', type=int, default=10, help='The length that the model runs when evaluating hyperparameters.') parser.add_argument('--evaluate_model_per_epochs', type=int, default=10, help='How often should we evaluate the model during training.') parser.add_argument('--max_train_epochs', type=int, default=1000, help='Max number of epochs that the model runs during training.')
import gym from gym import logger as gymlogger from gym.wrappers import Monitor gymlogger.set_level(30) import numpy as np import random import math import glob import io import os import cv2 import base64 import tensorflow as tf import matplotlib import matplotlib.pyplot as plt from collections import deque from datetime import datetime import tensorflow as tf import argparse import os import numpy as np from atari_wrappers import make_atari, wrap_deepmind, Monitor from a2c import Agent from neural_network import CNN import imageio import time from numpy.random import seed from tensorflow import set_random_seed from setproctitle import setproctitle as ptitle
def run_bench(): logger.set_level(logger.INFO) rewards = {x: [] for x in list(agents_list.keys())} regrets = {x: [] for x in list(agents_list.keys())} brs = {x: [] for x in list(agents_list.keys())} for i in range(nb_exp): print(f'exp {i}') for _ in range(nb_episodes): env.env.reset() for agent_name in list(agents_list.keys()): agent = agents_list[agent_name] #print(agent.reset()) agent = agent.reset() ob = env.reset() step = 0 reward = 0 reward_record = [0] regret_record = [0] br_record = [0] done = False while True: step += 1 action = agent.act(ob, reward, done) ob, reward, done, _ = env.step(action) # print(f'{agent_name} - action:{action} - reward:{reward}') best_reward = env.env.get_best_reward() if done: break reward_record.append(reward_record[step - 1] + reward) regret_record.append(regret_record[step - 1] + best_reward - reward) br_record.append(best_reward) rewards[agent_name].append(reward_record) regrets[agent_name].append(regret_record) brs[agent_name].append(br_record) env.env.close() plt.figure() for agent_name in rewards: x = np.mean(rewards[agent_name], axis=0) plt.plot(x, label=f'{agent_name}') plt.title('Score Cumulé') plt.legend() plt.savefig(env_name + '_Score') plt.show() f1 = plt.figure() for agent_name in regrets: x = np.mean(regrets[agent_name], axis=0) plt.plot(x, label=f'{agent_name}') plt.title('Regret Cumulé') plt.legend() plt.savefig(env_name + '_Regret') plt.show() plt.figure() plt.plot(np.mean(brs['Random Agent'], axis=0)) plt.title('Meilleure Récompense') plt.savefig(env_name + '_Best_Reward') plt.show()