示例#1
0
def main(job_id, input_params):
    import sys
    import logging
    gym.undo_logger_setup()
    logger = logging.getLogger()
    logger.setLevel(logging.ERROR)

    datasets = ["CartPole-v0", "Acrobot-v0", "MountainCar-v0", "Pendulum-v0"]
    params = LeeaParams()
    params.parent_fitness_decay = input_params["parent_fitness_decay"][0]
    params.mutation_power_decay = 0.99
    params.sexual_reproduction_proportion = 0.5
    params.population_size = input_params["population_size"][0]
    params.starting_mutation_power = input_params["starting_mutation_power"][0]
    params.mutation_power = params.starting_mutation_power
    params.mutation_rate = input_params["mutation_rate"][0]
    params.selection_proportion = input_params["selection_proportion"][0]

    max_evaluations = 5000
    architecture = "simple"
    network_size = 20

    cum_reward = 0
    for env_name in datasets:
        step_limit = gym.envs.registry.spec(env_name).timestep_limit
        build_network = net_configuration(architecture, network_size, env_name)
        reward = train_network(env_name, step_limit, max_evaluations,
                               build_network, params)
        if env_name == "Pendulum-v0":
            reward /= 10
        print reward, reward / 100
        cum_reward += reward

    return -cum_reward
示例#2
0
def build_env(env_id):
    gym.undo_logger_setup()
    env = gym.make(env_id)

    if env_id.endswith('NoFrameskip-v4'):
        env = wrap_deepmind(env)
    return env
示例#3
0
def setup(exp, single_threaded):
    import gym
    gym.undo_logger_setup()
    from . import policies, tf_util

    config = Config(**exp['config'])
    # from util.MultiStateEnv import MultiStateEnv
    #
    # env = MultiStateEnv(game='SonicTheHedgehog-Genesis',
    #                     states=['GreenHillZone.Act1', 'GreenHillZone.Act2', 'GreenHillZone.Act3',
    #                             'LabyrinthZone.Act1', 'LabyrinthZone.Act2', 'LabyrinthZone.Act3',
    #                             'MarbleZone.Act1', 'MarbleZone.Act2', 'MarbleZone.Act3',
    #                             'ScrapBrainZone.Act1', 'ScrapBrainZone.Act2',
    #                             'SpringYardZone.Act1', 'SpringYardZone.Act2', 'SpringYardZone.Act3',
    #                             'StarLightZone.Act1', 'StarLightZone.Act2', 'StarLightZone.Act3'])
    #
    # from gym.wrappers.time_limit import TimeLimit
    # env = TimeLimit(env,max_episode_steps=env.spec.max_episode_steps,
    #                     max_episode_seconds=env.spec.max_episode_seconds)

    env = gym.make(exp['env_id'])
    if exp['env_id'].endswith('NoFrameskip-v4'):
        from .atari_wrappers import wrap_deepmind
        env = wrap_deepmind(env)
    sess = make_session(single_threaded=single_threaded)
    policy = getattr(policies, exp['policy']['type'])(env.observation_space, env.action_space, **exp['policy']['args'])
    tf_util.initialize()
    return config, env, sess, policy
示例#4
0
def setup_env(exp):
    import gym
    gym.undo_logger_setup()
    config = Config(**exp['config'])
    env = gym.make(exp['env_id'])
    if exp['policy']['type'] == "ESAtariPolicy":
        from .atari_wrappers import wrap_deepmind
        env = wrap_deepmind(env)
    return config, env
示例#5
0
def testA2C():
    render = False
    filename = 'testA2C.h5'
    resume = False
    # resume = True
    # render = True

    gym.undo_logger_setup()  # Stop gym logging
    actionSpace = [2, 3]
    agent = A2C_OneGame(2, 1024, actionSpace, filename, resume=resume)
    game = Game('Pong-v0', agent, render=render, logfile='test.log')
    game.play()
示例#6
0
def test():
    render = False
    filename = 'test.h5'
    resume = False
    # filename = 'pong_gym_keras_mlp_full_batch.h5'
    # resume = True
    # render = True

    gym.undo_logger_setup()  # Stop gym logging
    agent = KarpathyPolicyPong(filename, resume=resume)
    game = Game('Pong-v0', agent, render=render, logfile='test.log')
    game.play()
示例#7
0
def test():
    gym.undo_logger_setup()

    # Get the environment and extract the number of actions.
    env = gym.make(ENV_NAME)
    np.random.seed(123)
    env.seed(123)

    model_dump_filepath = 'pretrained_models/ddpg_{}_weights.h5f'.format(ENV_NAME)

    agent = build_agent(env)
    agent.load_weights(model_dump_filepath)
    agent.test(env, nb_episodes=5, visualize=False, nb_max_episode_steps=200)
示例#8
0
def setup(exp, single_threaded):
    import gym
    gym.undo_logger_setup()
    from . import policies, tf_util

    config = Config(**exp['config'])
    env = gym.make(exp['env_id'])
    if exp['env_id'].endswith('NoFrameskip-v4'):
        from .atari_wrappers import wrap_deepmind
        env = wrap_deepmind(env)
    sess = make_session(single_threaded=single_threaded)
    policy = getattr(policies, exp['policy']['type'])(env.observation_space, env.action_space, **exp['policy']['args'])
    tf_util.initialize()
    return config, env, sess, policy
def load_network(env):
    ENV_NAME = 'Carom-v0'
    gym.undo_logger_setup()

    # Get the environment and extract the number of actions.
    np.random.seed(323)
    env.seed(323)
    assert len(env.action_space.shape) == 1
    nb_actions = env.action_space.shape[0]

    # Next, we build a very simple model.
    actor = Sequential()
    actor.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
    actor.add(Dense(16))
    actor.add(Activation('relu'))
    actor.add(Dense(16))
    actor.add(Activation('relu'))
    actor.add(Dense(16))
    actor.add(Activation('relu'))
    actor.add(Dense(nb_actions))
    actor.add(Activation('linear'))

    action_input = Input(shape=(nb_actions, ), name='action_input')
    observation_input = Input(shape=(1, ) + env.observation_space.shape,
                              name='observation_input')
    flattened_observation = Flatten()(observation_input)
    x = Concatenate()([action_input, flattened_observation])
    x = Dense(32)(x)
    x = Activation('relu')(x)
    x = Dense(32)(x)
    x = Activation('relu')(x)
    x = Dense(32)(x)
    x = Activation('relu')(x)
    x = Dense(1)(x)
    x = Activation('linear')(x)
    critic = Model(inputs=[action_input, observation_input], outputs=x)

    memory = SequentialMemory(limit=50000, window_length=1)
    agent = DDPGAgent(nb_actions=nb_actions,
                      actor=actor,
                      critic=critic,
                      critic_action_input=action_input,
                      memory=memory,
                      nb_steps_warmup_critic=100,
                      nb_steps_warmup_actor=100,
                      gamma=.99,
                      target_model_update=1e3)
    agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])
    agent.load_weights('ddpg_{}_2balls_final_weights_v4.h5f'.format(ENV_NAME))
    return agent
示例#10
0
def setup(exp, single_threaded):
    import gym
    gym.undo_logger_setup()
    from . import policies, tf_util

    config = Config(**exp['config'])
    env = gym.make(exp['env_id'])
    sess = make_session(single_threaded=single_threaded)
    policy = getattr(policies, exp['policy']['type'])(env.observation_space,
                                                      env.action_space,
                                                      **exp['policy']['args'])
    tf_util.initialize()

    return config, env, sess, policy
示例#11
0
def make_multi_pong(frame_stack=True):
    import gym
    import retro
    from baselines.common.atari_wrappers import FrameStack
    gym.undo_logger_setup()
    game_env = env = retro.make('Pong-Atari2600', players=2)
    env = RetroALEActions(env, game_env.BUTTONS, n_players=2)
    env = NoReward(env)
    env = FrameSkip(env, 4)
    env = ProcessFrame84(env, crop=False)
    if frame_stack:
        env = FrameStack(env, 4)

    return env
示例#12
0
def make_multi_pong(frame_stack=True):
    import gym
    import retro

    gym.undo_logger_setup()
    game_env = env = retro.make("Pong-Atari2600", players=2)
    env = RetroALEActions(env, game_env.BUTTONS, n_players=2)
    env = NoReward(env)
    env = FrameSkip(env, 4)
    env = ProcessFrame84(env, crop=False)
    if frame_stack:
        env = FrameStack(env, 4)

    return env
示例#13
0
def setup_env(exp):
    import gym
    gym.undo_logger_setup()
    config = Config(**exp['config'])
    if exp['env_id'] == "DeceptivePointEnv-v0":
        # Lil hack we need to do
        import sys
        sys.path.append("../envs")
        import point_env
    env = gym.make(exp['env_id'])
    if exp['policy']['type'] == "ESAtariPolicy":
        from .atari_wrappers import wrap_deepmind
        env = wrap_deepmind(env)
    return config, env
示例#14
0
    def _parallel_worker(parent, env, seed, connection):
        gym.undo_logger_setup()
        env = gym.make(env)
        env.seed(seed)

        connection.send(parent._maybe_discretize(env.reset()))
        try:
            while True:
                action = connection.recv()
                state, reward, done, info = env.step(action)
                if done: state = env.reset()
                connection.send((parent._maybe_discretize(state), reward, done, info))
        except KeyboardInterrupt:
            pass
示例#15
0
    def main(self):
        gym.undo_logger_setup()

        self.stats.start()
        self.dynamic_adjustment.start()

        if Config.PLAY_MODE:
            for trainer in self.trainers:
                trainer.enabled = False

        learning_rate_multiplier = (Config.LEARNING_RATE_END - Config.LEARNING_RATE_START) / Config.ANNEALING_EPISODE_COUNT
        beta_multiplier = (Config.BETA_END - Config.BETA_START) / Config.ANNEALING_EPISODE_COUNT

        while self.stats.episode_count.value < Config.EPISODES:
            step = min(self.stats.episode_count.value, Config.ANNEALING_EPISODE_COUNT - 1)
            self.model.learning_rate = Config.LEARNING_RATE_START + learning_rate_multiplier * step
            self.model.beta = Config.BETA_START + beta_multiplier * step

            # Saving is async - even if we start saving at a given episode, we may save the model at a later episode
            if Config.SAVE_MODELS and self.stats.should_save_model.value > 0:
                print("Saving GA3C model!")
                self.save_model()
                self.stats.should_save_model.value = 0

            if self.reward_modifier:
                ################################
                #  START REWARD MODIFICATIONS  #
                ################################
                if not self.reward_modifier_q.empty():
                    source_id, done, path = self.reward_modifier_q.get()
                    rewards = self.reward_modifier.predict_reward(path)

                    if done:
                        self.reward_modifier.path_callback(path)

                    self.agents[source_id].wait_q.put(rewards)
                ################################
                #   END REWARD MODIFICATIONS   #
                ################################

            time.sleep(0.01)

        self.dynamic_adjustment.exit_flag = True
        while self.agents:
            self.remove_agent()
        while self.predictors:
            self.remove_predictor()
        while self.trainers:
            self.remove_trainer()
示例#16
0
    def env(self):
        if self._env is None:
            import gym
            gym.undo_logger_setup()  # Get rid of gym logging

            # logger = logging.getLogger()
            # logger.addHandler(logging.StreamHandler(sys.stdout))

            self.pre_init_hook()
            self._env = self._create_gym_env(self.OPEN_AI_GYM_ENV_NAME)
            if self.runner.run_mode == 'run' and not self.runner.render:
                # We only run rendering to video in "run" mode (not training mode)
                self._env = monitor.UniMonitor(self._env)

        return self._env
示例#17
0
文件: config.py 项目: Frawak/squig-rl
def getEnv(visualize=False):
    if ENV_TAG=='OSIM':
        e = env(difficulty=0, visualize=visualize, rewardMode=15, rewardScale=1.,
                action_repetition=3, pel_min=0.6)
    elif ENV_TAG=='GYM' or 'ROBOSCHOOL':
        gym.undo_logger_setup()
        e = gym.make('RoboschoolInvertedPendulum-v1')
        #e.seed(0)
        if visualize:
            e = gv(e)        
    else:
        e = None
        raise RuntimeError('No valid environment selected!')
    
    return e
示例#18
0
def test_smoke(env_id):
    """Check that environments start up without errors and that we can extract rewards and observations"""
    gym.undo_logger_setup()
    logging.getLogger().setLevel(logging.INFO)

    env = gym.make(env_id)
    env = wrappers.Unvectorize(env)

    if os.environ.get('FORCE_LATEST_UNIVERSE_DOCKER_RUNTIMES'):  # Used to test universe-envs in CI
        configure_with_latest_docker_runtime_tag(env)
    else:
        env.configure(remotes=1)

    env.reset()
    _rollout(env, timestep_limit=60*30) # Check a rollout
示例#19
0
def setup(exp, single_threaded):
    import gym
    gym.undo_logger_setup()
    from . import policies, tf_util

    config = Config(**exp['config'])
    # env = gym.make(exp['env_id'])
    # if exp['env_id'].endswith('NoFrameskip-v4'):
    from .atari_wrappers import wrap_deepmind
    game_states = pd.read_csv("train_small.csv").values.tolist()
    env = wrap_deepmind('SonicTheHedgehog-Genesis', 'LabyrinthZone.Act1', game_states=game_states)
    sess = make_session(single_threaded=single_threaded)
    policy = getattr(policies, exp['policy']['type'])(env.observation_space, env.action_space, **exp['policy']['args'])
    tf_util.initialize()
    return config, env, sess, policy
示例#20
0
def make_mario_env(crop=True, frame_stack=True, clip_rewards=False):
    assert clip_rewards is False
    import gym
    import retro
    from baselines.common.atari_wrappers import FrameStack

    gym.undo_logger_setup()
    env = retro.make('SuperMarioBros-Nes', 'Level1-1')
    buttons = env.BUTTONS
    env = MarioXReward(env)
    env = FrameSkip(env, 4)
    env = ProcessFrame84(env, crop=crop)
    if frame_stack:
        env = FrameStack(env, 4)
    env = LimitedDiscreteActions(env, buttons)
    return env
示例#21
0
def test_smoke(env_id):
    """Check that environments start up without errors and that we can extract rewards and observations"""
    gym.undo_logger_setup()
    logging.getLogger().setLevel(logging.INFO)

    env = gym.make(env_id)
    env = wrappers.Unvectorize(env)

    if os.environ.get('FORCE_LATEST_UNIVERSE_DOCKER_RUNTIMES'
                      ):  # Used to test universe-envs in CI
        configure_with_latest_docker_runtime_tag(env)
    else:
        env.configure(remotes=1)

    env.reset()
    _rollout(env, timestep_limit=60 * 30)  # Check a rollout
示例#22
0
def train_cartpole_nnet():
    #from test import CartPoleContEnv

    ENV_NAME = 'CartPole-v0'
    gym.undo_logger_setup()

    # Get the environment and extract the number of actions.
    env = gym.make(ENV_NAME)

    np.random.seed(123)
    env.seed(123)
    nb_actions = env.action_space.n

    # Next, we build a very simple model.
    model = Sequential()
    model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(nb_actions))

    # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
    # even the metrics!
    memory = SequentialMemory(limit=60000, window_length=1)
    policy = BoltzmannQPolicy()
    dqn = DQNAgent(model=model,
                   nb_actions=nb_actions,
                   memory=memory,
                   nb_steps_warmup=100,
                   target_model_update=1e-2,
                   policy=policy)
    dqn.compile(Adam(lr=1e-3), metrics=['mae'])

    # Okay, now it's time to learn something! We visualize the training here for show, but this
    # slows down training quite a lot. You can always safely abort the training prematurely using
    # Ctrl + C.
    dqn.fit(env, nb_steps=60000, visualize=False, verbose=2)

    # get model weights
    weights = model.get_weights()

    # Finally, evaluate our algorithm for 5 episodes.
    dqn.test(env, nb_episodes=5, visualize=True)
    return weights
示例#23
0
def train_module(env, agent):
    """
    chainerrlのモジュールによるtraining
    """
    import logging
    import sys
    import gym
    gym.undo_logger_setup()  # Turn off gym's default logger settings
    logging.basicConfig(level=logging.INFO, stream=sys.stdout, format='')

    chainerrl.experiments.train_agent_with_evaluation(
        agent, env,
        steps=10000,           # 合計10000ステップagentを動かす
        eval_n_runs=5,         # 本番テストのたびに 5回評価を行う
        max_episode_len=200,   # 1ゲームのステップ数
        eval_frequency=1000,   # 1000ステップごとに本番テストを行う
        outdir='agent/result') # Save everything to 'agent/result' directory
示例#24
0
def get_new_env(env_name, cmdl):
    """Configure the training environment and return an instance."""
    import logging
    import gym
    import gym_fast_envs  # noqa
    from gym.wrappers import Monitor

    # Undo the default logger and configure a new one.
    gym.undo_logger_setup()
    logger = logging.getLogger()
    logger.setLevel(logging.WARNING)

    # Configure environment
    outdir = '/tmp/nec/%s-results' % cmdl.label
    env = gym.make(env_name)
    env = Monitor(env, directory=outdir, force=True, video_callable=False)
    env.seed(cmdl.seed)
    return env
示例#25
0
def setup(exp, single_threaded):
    import gym
    gym.undo_logger_setup()
    from . import policies, tf_util

    config = Config(**exp['config'])
    if exp['env_id'] == "DeceptivePointEnv-v0":
        # Lil hack we need to do
        import sys
        sys.path.append("../envs")
        import point_env
    env = gym.make(exp['env_id'])
    if exp['policy']['type'] == "ESAtariPolicy":
        from .atari_wrappers import wrap_deepmind
        env = wrap_deepmind(env)
    sess = make_session(single_threaded=single_threaded)
    policy = getattr(policies, exp['policy']['type'])(env.observation_space, env.action_space, **exp['policy']['args'])
    tf_util.initialize()
    return config, env, sess, policy
示例#26
0
def setup_env(exp):
    import gym
    gym.undo_logger_setup()
    config = Config(**exp['config'])

    if not isinstance(exp['env_id'], (list, )):
        env = gym.make(exp['env_id'])
        if exp['policy']['type'] == "ESAtariPolicy":
            from .atari_wrappers import wrap_deepmind
            env = wrap_deepmind(env)
        return config, [env]

    envs = []
    for env_id in exp['env_id']:
        env = gym.make(env_id)
        if exp['policy']['type'] == "ESAtariPolicy":
            from .atari_wrappers import wrap_deepmind
            env = wrap_deepmind(env)
        envs.append(env)
    return config, envs
示例#27
0
def setup(exp, single_threaded, snapshot_file=None):
    import gym
    gym.undo_logger_setup()
    from . import policies, tf_util

    config = Config(**exp['config'])
    env = gym.make(exp['env_id'])
    session = make_session(single_threaded=single_threaded)
    if snapshot_file is None:
        policy = getattr(policies,
                         exp['policy']['type'])(env.observation_space,
                                                env.action_space,
                                                **exp['policy']['args'])
        running_from_snapshot = False
    else:
        print("[master] Initializing agent weights using a snapshot...")
        policy = getattr(policies, exp['policy']['type']).Load(snapshot_file)
        running_from_snapshot = True
    tf_util.initialize()
    return config, env, session, policy, running_from_snapshot
示例#28
0
    def __init__(self, log):
        """Initialize default configuration."""
        # some libraries think it is a good idea to add handlers by default
        # without documenting that at all, thanks gpy...
        log.propagate = False

        self.log = log
        self.n_jobs = 1
        self.monitor_verbosity = 0

        self._stream_handler = None
        self._file_handler = None
        self._fmt = ('%(process)d - %(asctime)s - %(name)s - %(levelname)s' +
                     ' - %(message)s')
        self._formatter = logging.Formatter(self._fmt)

        try:
            import gym
            gym.undo_logger_setup()
        except:
            pass
def test_nice_vnc_semantics_match(spec, matcher, wrapper):
    # Check that when running over VNC or using the raw environment,
    # semantics match exactly.
    gym.undo_logger_setup()
    logging.getLogger().setLevel(logging.INFO)

    spaces.seed(0)

    vnc_env = spec.make()
    if vnc_env.metadata.get('configure.required', False):
        vnc_env.configure(remotes=1)
    vnc_env = wrapper(vnc_env)
    vnc_env = wrappers.Unvectorize(vnc_env)

    env = gym.make(spec._kwargs['gym_core_id'])

    env.seed(0)
    vnc_env.seed(0)

    # Check that reset observations work
    reset(matcher, env, vnc_env, stage='initial reset')

    # Check a full rollout
    rollout(matcher, env, vnc_env, timestep_limit=50, stage='50 steps')

    # Reset to start a new episode
    reset(matcher, env, vnc_env, stage='reset to new episode')

    # Check that a step into the next episode works
    rollout(matcher,
            env,
            vnc_env,
            timestep_limit=1,
            stage='1 step in new episode')

    # Make sure env can be reseeded
    env.seed(1)
    vnc_env.seed(1)
    reset(matcher, env, vnc_env, 'reseeded reset')
    rollout(matcher, env, vnc_env, timestep_limit=1, stage='reseeded step')
示例#30
0
def env_factory(cmdl, mode):
    # Undo the default logger and configure a new one.
    gym.undo_logger_setup()
    logger = logging.getLogger()
    logger.setLevel(logging.WARNING)

    print(clr("[Main] Constructing %s environment." % mode, attrs=['bold']))
    env = gym.make(cmdl.env_name)

    if hasattr(cmdl, 'rescale_dims'):
        state_dims = (cmdl.rescale_dims, cmdl.rescale_dims)
    else:
        state_dims = env.observation_space.shape[0:2]

    env_class, hist_len, cuda = cmdl.env_class, cmdl.hist_len, cmdl.cuda

    if mode == "training":
        env = PreprocessFrames(env, env_class, hist_len, state_dims, cuda)
        if hasattr(cmdl, 'reward_clamp') and cmdl.reward_clamp:
            env = SqueezeRewards(env)
        if hasattr(cmdl, 'done_after_lost_life') and cmdl.done_after_lost_life:
            env = DoneAfterLostLife(env)
        print('-' * 50)
        return env

    elif mode == "evaluation":
        if cmdl.eval_env_name != cmdl.env_name:
            print(
                clr("[%s] Warning! evaluating on a different env: %s" %
                    ("Main", cmdl.eval_env_name),
                    'red',
                    attrs=['bold']))
            env = gym.make(cmdl.eval_env_name)

        env = PreprocessFrames(env, env_class, hist_len, state_dims, cuda)
        env = EvaluationMonitor(env, cmdl)
        print('-' * 50)
        return env
示例#31
0
def setup(exp, single_threaded):
    import gym
    gym.undo_logger_setup()
    from . import policies, tf_util

    config = Config(**exp['config'])
    if 'env_id' in exp:
        env = gym.make(exp['env_id'])
    elif 'env_target' in exp:
        env_target = exp['env_target']
        (module_str, cls_str) = env_target.split(":")
        module = importlib.import_module(module_str)
        cls = getattr(module, cls_str)
        env = cls(**exp['env_params'])
    else:
        raise NotImplementedError
    sess = make_session(single_threaded=single_threaded)
    policy = getattr(policies, exp['policy']['type'])(env.observation_space,
                                                      env.action_space,
                                                      **exp['policy']['args'])
    tf_util.initialize()

    return config, env, sess, policy
示例#32
0
def test_nice_vnc_semantics_match(spec, matcher, wrapper):
    # Check that when running over VNC or using the raw environment,
    # semantics match exactly.
    gym.undo_logger_setup()
    logging.getLogger().setLevel(logging.INFO)

    spaces.seed(0)

    vnc_env = spec.make()
    if vnc_env.metadata.get('configure.required', False):
        vnc_env.configure(remotes=1)
    vnc_env = wrapper(vnc_env)
    vnc_env = wrappers.Unvectorize(vnc_env)

    env = gym.make(spec._kwargs['gym_core_id'])

    env.seed(0)
    vnc_env.seed(0)

    # Check that reset observations work
    reset(matcher, env, vnc_env, stage='initial reset')

    # Check a full rollout
    rollout(matcher, env, vnc_env, timestep_limit=50, stage='50 steps')

    # Reset to start a new episode
    reset(matcher, env, vnc_env, stage='reset to new episode')

    # Check that a step into the next episode works
    rollout(matcher, env, vnc_env, timestep_limit=1, stage='1 step in new episode')

    # Make sure env can be reseeded
    env.seed(1)
    vnc_env.seed(1)
    reset(matcher, env, vnc_env, 'reseeded reset')
    rollout(matcher, env, vnc_env, timestep_limit=1, stage='reseeded step')
示例#33
0
from copy import deepcopy
import torch
import gym
from normalized_env import *
# from pybullet_envs.bullet.racecarGymEnv import RacecarGymEnv
# from pybullet_envs.bullet.kukaGymEnv import KukaGymEnv
from evaluator import Evaluator
from ddpg import DDPG
from util import *
from tensorboardX import SummaryWriter
from observation_processor import queue
from multi import fastenv

# from llll import Subprocess

gym.undo_logger_setup()

import time

writer = SummaryWriter()

def train(num_iterations, agent, env, evaluate, bullet):
    fenv = fastenv(env, args.action_repeat, args.vis, args.atari)
    window_length = args.window_length
    validate_interval = args.validate_interval
    save_interval = args.save_interval
    max_episode_length = args.max_episode_length // args.action_repeat
    debug = args.debug
    visualize = args.vis
    traintimes = args.traintimes
    output = args.output