示例#1
0
import gym_selfx.selfx

import gym
import argparse

from gym import wrappers, logger


parser = argparse.ArgumentParser()
parser.add_argument("-n", type=int, default=1000, help="number of epochs of training")
opt = parser.parse_args()


if __name__ == '__main__':

    logger.set_level(logger.INFO)

    env = gym.make('selfx-billard-v0')

    outdir = 'results/selfx-billard'
    env = wrappers.Monitor(env, directory=outdir, force=True)
    env.seed(0)
    game = env.game

    episode_count = opt.n
    reward = 0
    done = False

    for i in range(episode_count):
        ob = env.reset()
        while True:
示例#2
0
文件: cem.py 项目: joschu/gym
        th_std = elite_ths.std(axis=0)
        yield {'ys' : ys, 'theta_mean' : th_mean, 'y_mean' : ys.mean()}

def do_rollout(agent, env, num_steps, render=False):
    total_rew = 0
    ob = env.reset()
    for t in range(num_steps):
        a = agent.act(ob)
        (ob, reward, done, _info) = env.step(a)
        total_rew += reward
        if render and t%3==0: env.render()
        if done: break
    return total_rew, t+1

if __name__ == '__main__':
    logger.set_level(logger.INFO)

    parser = argparse.ArgumentParser()
    parser.add_argument('--display', action='store_true')
    parser.add_argument('target', nargs="?", default="CartPole-v0")
    args = parser.parse_args()

    env = gym.make(args.target)
    env.seed(0)
    np.random.seed(0)
    params = dict(n_iter=10, batch_size=25, elite_frac = 0.2)
    num_steps = 200

    # You provide the directory to write to (can be an existing
    # directory, but can't contain previous monitor results. You can
    # also dump to a tempdir if you'd like: tempfile.mkdtemp().
"""
Plays num_games games of Pong with actions directed by QLearningAlgorithm's getAction function, which is a function approximation by linear regression of a dictionary of features
of the current state and actions. 
"""


class QLearningAgent(object):
    def __init__(self, action_space):
        self.action_space = action_space

    def act(self, observation, reward, done, q):
        return q.getAction(observation, done)


if __name__ == '__main__':
    logger.set_level(logger.WARN)

    #Opens a Pong environment
    env = gym.make('Pong-v0')

    #directory to output game statistics
    outdir = 'tmp/results'
    env = wrappers.Monitor(env, directory=outdir, force=True)
    env.seed(0)
    agent = QLearningAgent(env.action_space)

    num_games = 301
    reward = 0
    done = False
    score_list = []
    q = QLearningAlgorithm([0, 2, 3],
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as T

# set up matplotlib
is_ipython = 'inline' in matplotlib.get_backend()
if is_ipython:
    from IPython import display

plt.ion()

# Open AI related
import gym
from gym import logger as gymlogger
from gym.wrappers import Monitor
gymlogger.set_level(40) #error only

import glob
import io
import base64
from IPython.display import HTML

# set up OpenAi Gym render in Colab
from IPython import display as ipythondisplay
from pyvirtualdisplay import Display
_display = Display(visible=False,  # use False with Xvfb
                   size=(1400, 900))
_ = _display.start()

# if gpu is to be used
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
示例#5
0
# Remove when using installed mpc.pytorch
import sys
sys.path.append('..')

import logging
import math
import time

import gym
import numpy as np
import torch
import torch.autograd
from gym import wrappers, logger as gym_log
from mpc import mpc

gym_log.set_level(gym_log.INFO)
logger = logging.getLogger(__name__)
logging.basicConfig(
    level=logging.DEBUG,
    format='[%(levelname)s %(asctime)s %(pathname)s:%(lineno)d] %(message)s',
    datefmt='%m-%d %H:%M:%S')

if __name__ == "__main__":
    ENV_NAME = "Pendulum-v0"
    TIMESTEPS = 10  # T
    N_BATCH = 1
    LQR_ITER = 5
    ACTION_LOW = -2.0
    ACTION_HIGH = 2.0

    class PendulumDynamics(torch.nn.Module):
示例#6
0
    with open(args['grid'], 'r') as grid_file:
        grid_config = yaml.load(grid_file)

    # Overwrite config
    # NOTE: get() makes use of a default value in case key is not in grid_config
    config = {key: grid_config.get(key, config[key]) for key in config}

    # Build parameter grid
    params = list(ParameterGrid(config))

    # Start clock
    start = time.time()
    print(f'About to evaluate {len(params)} parameter sets')

    # Disable logger
    logger.set_level(logger.DISABLED)

    # Multiprocessing pool
    pool = mp.Pool(processes=mp.cpu_count())

    # Run
    final_scores = pool.map(eval_single, list(enumerate(params)))

    # Close
    pool.close()
    pool.join()

    # Finished!
    print(f'Execution time: {(time.time() - start) / 3600:.2f} hours')

    # Create recording directory if it doesn't exist
示例#7
0
                        type=int,
                        default=1000,
                        help='maximum number of episodes to run')
    parser.add_argument('--verbose',
                        action='store_true',
                        help='output verbose logging for steps')
    parser.add_argument('--random_action',
                        action='store_true',
                        help='Random policy for comparison')
    parser.add_argument('--gamma',
                        type=float,
                        default=1.0,
                        metavar='G',
                        help='discount factor (default: 1.0)')
    parser.add_argument('--learning_rate',
                        type=float,
                        default=0.5,
                        help='learning rate (default: 0.5)')
    parser.add_argument('--seed', type=int, metavar='N', help='random seed')
    args = parser.parse_args()
    logger.set_level(logger.INFO)

    if args.verbose:
        logger.set_level(logger.DEBUG)

    # Set the random seed if defined
    if args.seed:
        random.seed(args.seed)

    # Run the training
    main(args)
def wrap_env(env, task_name, logger_level=logger.INFO):
    logger.set_level(logger_level)

    outdir = os.path.join(ROOT_DIR, 'logs/' + task_name + '-results')

    return wrappers.Monitor(env, directory=outdir, force=True)
示例#9
0
import gym
from gym import logger as gymlogger
from gym.wrappers import Monitor
gymlogger.set_level(40)
import tensorflow as tf
import time
import random
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import keras
from keras.layers import Dense
from keras.models import Sequential
from keras.layers import Reshape
from keras.layers import Conv2D
from keras.layers import Flatten
from keras.models import InputLayer
from keras.optimizers import Adam
import math
import glob
import io
import base64
from IPython import display as ipythondisplay
from pyvirtualdisplay import Display


# set up and run the environment
def run():
    # set up display recording
    display = Display(visible=0, size=(1400, 900))
    display = Display().start()
示例#10
0
import argparse
import sys
import gym
from gym import wrappers, logger

class random agent(object):
	def _init_(self,action_space):
		self.action_space=action_space
	def act(self,obervation,reward,done):
		return self.action_space.sample()

if __name_=='_main_':
	parse = argparse.ArgumentParser(descripition=None)
	parser.add_argument('env_id',nargs='?',default='CartPole-v0',help='Select the environment to run')
	args=parser.parser_args()
	logger.set_level(logger.INFG)
	env=gym.make(args.env_id)
	outdir='/tmp/random-agent-results'
	env=wrappers.Monitor(env,directory,video_callable=False,force=True)
	env.seed(0)
	agent=RandeomAgent(env.action_space)
	episode_count=100
	reward=0
	done=False
	
	for i in range(episode_count):
		ob=env.reset()
		while True:
			action=agent.act(ab,reward,done)
			ob,reward,done,_=env.step(action)				
			if done:
示例#11
0
    zero_completed_obs = np.zeros((NUM_CPU,) + ob_shape)
    zero_completed_obs[0, :] = test_env.reset()
    state = None
    for _ in range(L):
      action, state = model.predict(zero_completed_obs, state=state, deterministic=True)
      zero_completed_obs[0, :], reward, done, _ = test_env.env_method('step', action[0], indices=0)[0]
    sharpe_ratios.append(test_env.env_method('get_sharpe_ratio', indices=0)[0])
    if plot: test_env.env_method('render', indices=0)
  test_env.close()
  
  # Return the average sharpe ratio
  return sum(sharpe_ratios) / len(sharpe_ratios)
  

if __name__ == '__main__':
  logger.set_level(logger.ERROR)

  parser = argparse.ArgumentParser()
  parser.add_argument('--env', type=str)
  parser.add_argument('--use_sigmoid_layer', type=bool, default=False,
                      help='Whether or not to use SigmoidMlpPolicy. Drop this flag to use MlpPolicy.')
  parser.add_argument('--optimize', type=bool, default=False,
                      help='Search for optimal hyperparameters. Drop this flag to run the actual training.')
  parser.add_argument('--num_trials', type=int, default=10,
                      help='Number of trials to search for optimal hyperparameters.')
  parser.add_argument('--evaluation_epochs', type=int, default=10,
                      help='The length that the model runs when evaluating hyperparameters.')
  parser.add_argument('--evaluate_model_per_epochs', type=int, default=10,
                      help='How often should we evaluate the model during training.')
  parser.add_argument('--max_train_epochs', type=int, default=1000,
                      help='Max number of epochs that the model runs during training.')
示例#12
0
import gym
from gym import logger as gymlogger
from gym.wrappers import Monitor
gymlogger.set_level(30)

import numpy as np
import random
import math
import glob
import io
import os
import cv2
import base64
import tensorflow as tf
import matplotlib
import matplotlib.pyplot as plt
from collections import deque
from datetime import datetime
import tensorflow as tf

import argparse
import os
import numpy as np
from atari_wrappers import make_atari, wrap_deepmind, Monitor
from a2c import Agent
from neural_network import CNN
import imageio
import time
from numpy.random import seed
from tensorflow import set_random_seed
from setproctitle import setproctitle as ptitle
def run_bench():
    logger.set_level(logger.INFO)

    rewards = {x: [] for x in list(agents_list.keys())}
    regrets = {x: [] for x in list(agents_list.keys())}
    brs = {x: [] for x in list(agents_list.keys())}
    for i in range(nb_exp):
        print(f'exp {i}')
        for _ in range(nb_episodes):
            env.env.reset()
            for agent_name in list(agents_list.keys()):
                agent = agents_list[agent_name]
                #print(agent.reset())
                agent = agent.reset()
                ob = env.reset()
                step = 0
                reward = 0
                reward_record = [0]
                regret_record = [0]
                br_record = [0]
                done = False
                while True:
                    step += 1
                    action = agent.act(ob, reward, done)
                    ob, reward, done, _ = env.step(action)
                    # print(f'{agent_name} - action:{action} - reward:{reward}')
                    best_reward = env.env.get_best_reward()
                    if done:
                        break
                    reward_record.append(reward_record[step - 1] + reward)
                    regret_record.append(regret_record[step - 1] +
                                         best_reward - reward)
                    br_record.append(best_reward)
                rewards[agent_name].append(reward_record)
                regrets[agent_name].append(regret_record)
                brs[agent_name].append(br_record)
    env.env.close()

    plt.figure()
    for agent_name in rewards:
        x = np.mean(rewards[agent_name], axis=0)
        plt.plot(x, label=f'{agent_name}')

    plt.title('Score Cumulé')
    plt.legend()
    plt.savefig(env_name + '_Score')
    plt.show()

    f1 = plt.figure()
    for agent_name in regrets:
        x = np.mean(regrets[agent_name], axis=0)
        plt.plot(x, label=f'{agent_name}')
    plt.title('Regret Cumulé')
    plt.legend()
    plt.savefig(env_name + '_Regret')
    plt.show()

    plt.figure()
    plt.plot(np.mean(brs['Random Agent'], axis=0))
    plt.title('Meilleure Récompense')
    plt.savefig(env_name + '_Best_Reward')
    plt.show()