def testCachingEnvironmentVariables(self): # Test that we can define environment variables before the driver is connected. def foo_initializer(): return 1 def bar_initializer(): return [] def bar_reinitializer(bar): return [] ray.env.foo = ray.EnvironmentVariable(foo_initializer) ray.env.bar = ray.EnvironmentVariable(bar_initializer, bar_reinitializer) @ray.remote def use_foo(): return ray.env.foo @ray.remote def use_bar(): ray.env.bar.append(1) return ray.env.bar ray.init(num_workers=2) self.assertEqual(ray.get(use_foo.remote()), 1) self.assertEqual(ray.get(use_foo.remote()), 1) self.assertEqual(ray.get(use_bar.remote()), [1]) self.assertEqual(ray.get(use_bar.remote()), [1]) ray.worker.cleanup()
def testNetworkDriverWorkerIndependent(self): ray.init(num_workers=1) # Create a network on the driver locally. sess1 = tf.Session() loss1, init1 = make_linear_network() net_vars1 = ray.experimental.TensorFlowVariables(loss1, sess1) sess1.run(init1) # Create a network on the driver via an environment variable. ray.env.net = ray.EnvironmentVariable(net_vars_initializer, net_vars_reinitializer) net_vars2, init2, sess2 = ray.env.net sess2.run(init2) weights2 = net_vars2.get_weights() @ray.remote def set_and_get_weights(weights): ray.env.net[0].set_weights(weights) return ray.env.net[0].get_weights() new_weights2 = ray.get( set_and_get_weights.remote(net_vars2.get_weights())) self.assertEqual(weights2, new_weights2) ray.worker.cleanup()
def testNetworksIndependent(self): # Note we use only one worker to ensure that all of the remote functions run on the same worker. ray.init(num_workers=1) ray.env.net1 = ray.EnvironmentVariable(net_vars_initializer, net_vars_reinitializer) ray.env.net2 = ray.EnvironmentVariable(net_vars_initializer, net_vars_reinitializer) net_vars1, init1, sess1 = ray.env.net1 net_vars2, init2, sess2 = ray.env.net2 # Initialize the networks sess1.run(init1) sess2.run(init2) @ray.remote def set_and_get_weights(weights1, weights2): ray.env.net1[0].set_weights(weights1) ray.env.net2[0].set_weights(weights2) return ray.env.net1[0].get_weights(), ray.env.net2[0].get_weights() # Make sure the two networks have different weights. TODO(rkn): Note that # equality comparisons of numpy arrays normally does not work. This only # works because at the moment they have size 1. weights1 = net_vars1.get_weights() weights2 = net_vars2.get_weights() self.assertNotEqual(weights1, weights2) # Set the weights and get the weights, and make sure they are unchanged. new_weights1, new_weights2 = ray.get( set_and_get_weights.remote(weights1, weights2)) self.assertEqual(weights1, new_weights1) self.assertEqual(weights2, new_weights2) # Swap the weights. new_weights2, new_weights1 = ray.get( set_and_get_weights.remote(weights2, weights1)) self.assertEqual(weights1, new_weights1) self.assertEqual(weights2, new_weights2) ray.worker.cleanup()
def testVariableNameCollision(self): ray.init(num_workers=2) ray.env.net1 = ray.EnvironmentVariable(net_vars_initializer, net_vars_reinitializer) ray.env.net2 = ray.EnvironmentVariable(net_vars_initializer, net_vars_reinitializer) net_vars1, init1, sess1 = ray.env.net1 net_vars2, init2, sess2 = ray.env.net2 # Initialize the networks sess1.run(init1) sess2.run(init2) # This is checking that the variable names of the two nets are the same, # i.e. that the names in the weight dictionaries are the same ray.env.net1[0].set_weights(ray.env.net2[0].get_weights()) ray.worker.cleanup()
def testFailImportingEnvironmentVariable(self): ray.init(num_workers=2, driver_mode=ray.SILENT_MODE) # This will throw an exception when the environment variable is imported on # the workers. def initializer(): if ray.worker.global_worker.mode == ray.WORKER_MODE: raise Exception("The initializer failed.") return 0 ray.env.foo = ray.EnvironmentVariable(initializer) wait_for_errors(b"register_environment_variable", 2) # Check that the error message is in the task info. self.assertIn(b"The initializer failed.", ray.error_info()[0][b"message"]) ray.worker.cleanup()
def testRemoteTrainingStep(self): ray.init(num_workers=1) ray.env.net = ray.EnvironmentVariable(train_vars_initializer, net_vars_reinitializer) @ray.remote def training_step(weights): _, variables, _, sess, grads, _, placeholders = ray.env.net variables.set_weights(weights) return sess.run([grad[0] for grad in grads], feed_dict=dict(zip(placeholders, [[1] * 100] * 2))) _, variables, init, sess, _, _, _ = ray.env.net sess.run(init) ray.get(training_step.remote(variables.get_weights())) ray.worker.cleanup()
def testRemoteTrainingLoss(self): ray.init(num_workers=2) ray.env.net = ray.EnvironmentVariable(train_vars_initializer, net_vars_reinitializer) @ray.remote def training_step(weights): _, variables, _, sess, grads, _, placeholders = ray.env.net variables.set_weights(weights) return sess.run([grad[0] for grad in grads], feed_dict=dict( zip(placeholders, [[1] * 100, [2] * 100]))) loss, variables, init, sess, grads, train, placeholders = ray.env.net sess.run(init) before_acc = sess.run(loss, feed_dict=dict( zip(placeholders, [[2] * 100, [4] * 100]))) for _ in range(3): gradients_list = ray.get([ training_step.remote(variables.get_weights()) for _ in range(2) ]) mean_grads = [ sum([gradients[i] for gradients in gradients_list]) / len(gradients_list) for i in range(len(gradients_list[0])) ] feed_dict = { grad[0]: mean_grad for (grad, mean_grad) in zip(grads, mean_grads) } sess.run(train, feed_dict=feed_dict) after_acc = sess.run(loss, feed_dict=dict( zip(placeholders, [[2] * 100, [4] * 100]))) self.assertTrue(before_acc < after_acc) ray.worker.cleanup()
def testUsingEnvironmentVariablesOnDriver(self): ray.init(num_workers=1) # Test that we can add a variable to the key-value store. def foo_initializer(): return [] def foo_reinitializer(foo): return [] ray.env.foo = ray.EnvironmentVariable(foo_initializer, foo_reinitializer) @ray.remote def use_foo(): foo = ray.env.foo foo.append(1) return foo # Check that running a remote function does not reset the enviroment # variable on the driver. foo = ray.env.foo self.assertEqual(foo, []) foo.append(2) self.assertEqual(foo, [2]) foo.append(3) self.assertEqual(foo, [2, 3]) self.assertEqual(ray.get(use_foo.remote()), [1]) self.assertEqual(ray.get(use_foo.remote()), [1]) self.assertEqual(ray.get(use_foo.remote()), [1]) # Check that the copy of foo on the driver has not changed. self.assertEqual(foo, [2, 3]) foo = ray.env.foo self.assertEqual(foo, [2, 3]) ray.worker.cleanup()
def testFailReinitializingVariable(self): ray.init(num_workers=2, driver_mode=ray.SILENT_MODE) def initializer(): return 0 def reinitializer(foo): raise Exception("The reinitializer failed.") ray.env.foo = ray.EnvironmentVariable(initializer, reinitializer) @ray.remote def use_foo(): ray.env.foo use_foo.remote() wait_for_errors(b"reinitialize_environment_variable", 1) # Check that the error message is in the task info. self.assertIn(b"The reinitializer failed.", ray.error_info()[0][b"message"]) ray.worker.cleanup()
def testEnvironmentVariablesInPythonMode(self): reload(test_functions) ray.init(driver_mode=ray.PYTHON_MODE) def l_init(): return [] def l_reinit(l): return [] ray.env.l = ray.EnvironmentVariable(l_init, l_reinit) @ray.remote def use_l(): l = ray.env.l l.append(1) return l # Get the local copy of the environment variable. This should be stateful. l = ray.env.l assert_equal(l, []) # Make sure the remote function does what we expect. assert_equal(ray.get(use_l.remote()), [1]) assert_equal(ray.get(use_l.remote()), [1]) # Make sure the local copy of the environment variable has not been # mutated. assert_equal(l, []) l = ray.env.l assert_equal(l, []) # Make sure that running a remote function does not reset the state of the # local copy of the environment variable. l.append(2) assert_equal(ray.get(use_l.remote()), [1]) assert_equal(l, [2]) ray.worker.cleanup()
# Function for initializing the gym environment. def env_initializer(): return gym.make("Pong-v0") # Function for reinitializing the gym environment in order to guarantee that # the state of the game is reset after each remote task. def env_reinitializer(env): env.reset() return env # Create an environment variable for the gym environment. ray.env.env = ray.EnvironmentVariable(env_initializer, env_reinitializer) def sigmoid(x): return 1.0 / (1.0 + np.exp(-x) ) # sigmoid "squashing" function to interval [0,1] def preprocess(I): """preprocess 210x160x3 uint8 frame into 6400 (80x80) 1D float vector""" I = I[35:195] # crop I = I[::2, ::2, 0] # downsample by factor of 2 I[I == 144] = 0 # erase background (background type 1) I[I == 109] = 0 # erase background (background type 2) I[I != 0] = 1 # everything else (paddles, ball) just set to 1 return I.astype(np.float).ravel()
def testEnvironmentVariables(self): ray.init(num_workers=1) # Test that we can add a variable to the key-value store. def foo_initializer(): return 1 def foo_reinitializer(foo): return foo ray.env.foo = ray.EnvironmentVariable(foo_initializer, foo_reinitializer) self.assertEqual(ray.env.foo, 1) @ray.remote def use_foo(): return ray.env.foo self.assertEqual(ray.get(use_foo.remote()), 1) self.assertEqual(ray.get(use_foo.remote()), 1) self.assertEqual(ray.get(use_foo.remote()), 1) # Test that we can add a variable to the key-value store, mutate it, and reset it. def bar_initializer(): return [1, 2, 3] ray.env.bar = ray.EnvironmentVariable(bar_initializer) @ray.remote def use_bar(): ray.env.bar.append(4) return ray.env.bar self.assertEqual(ray.get(use_bar.remote()), [1, 2, 3, 4]) self.assertEqual(ray.get(use_bar.remote()), [1, 2, 3, 4]) self.assertEqual(ray.get(use_bar.remote()), [1, 2, 3, 4]) # Test that we can use the reinitializer. def baz_initializer(): return np.zeros([4]) def baz_reinitializer(baz): for i in range(len(baz)): baz[i] = 0 return baz ray.env.baz = ray.EnvironmentVariable(baz_initializer, baz_reinitializer) @ray.remote def use_baz(i): baz = ray.env.baz baz[i] = 1 return baz assert_equal(ray.get(use_baz.remote(0)), np.array([1, 0, 0, 0])) assert_equal(ray.get(use_baz.remote(1)), np.array([0, 1, 0, 0])) assert_equal(ray.get(use_baz.remote(2)), np.array([0, 0, 1, 0])) assert_equal(ray.get(use_baz.remote(3)), np.array([0, 0, 0, 1])) # Make sure the reinitializer is actually getting called. Note that this is # not the correct usage of a reinitializer because it does not reset qux to # its original state. This is just for testing. def qux_initializer(): return 0 def qux_reinitializer(x): return x + 1 ray.env.qux = ray.EnvironmentVariable(qux_initializer, qux_reinitializer) @ray.remote def use_qux(): return ray.env.qux self.assertEqual(ray.get(use_qux.remote()), 0) self.assertEqual(ray.get(use_qux.remote()), 1) self.assertEqual(ray.get(use_qux.remote()), 2) ray.worker.cleanup()
def model_init(): with tf.Graph().as_default(): name_to_ops = gen_graph_ops(hyper_params) sess = tf.Session() variables = ray.experimental.TensorFlowVariables( name_to_ops['AddN'], sess) return {'sess': sess, 'variables': variables, 'name_to_ops': name_to_ops} init_dict = dict(buffer=lambda: utils.Buffer( utils.EnvWrapper(hyper_params['env']), hyper_params['buffer_size']), env=lambda: utils.EnvWrapper(hyper_params['env']), model=model_init) ray.init(num_workers=hyper_params['num_workers']) ray.env.env = ray.EnvironmentVariable(init_dict['env'], toolz.identity) ray.env.buffer = ray.EnvironmentVariable(init_dict['buffer'], toolz.identity) ray.env.model_dict = ray.EnvironmentVariable(init_dict['model'], toolz.identity) @ray.remote def step(params): env = ray.env.env sess = ray.env.model_dict['sess'] name_to_ops = ray.env.model_dict['name_to_ops'] variables = ray.env.model_dict['variables'] def step(): transition_maps = [] variables.set_weights(params['weights'])
# By default, when an environment variable is used by a remote function, the # initialization code will be rerun at the end of the remote task to ensure # that the state of the variable is not changed by the remote task. However, # the initialization code may be expensive. This case is one example, because # a TensorFlow network is constructed. In this case, we pass in a special # reinitialization function which gets run instead of the original # initialization code. As users, if we pass in custom reinitialization code, # we must ensure that no state is leaked between tasks. def net_reinitialization(net): return net # Register the network with Ray and create an environment variable for it. ray.env.net = ray.EnvironmentVariable(net_initialization, net_reinitialization) # Compute the loss on a batch of data. @ray.remote def loss(theta, xs, ys): net = ray.env.net net.variables.set_flat(theta) return net.loss(xs, ys) # Compute the gradient of the loss on a batch of data. @ray.remote def grad(theta, xs, ys): net = ray.env.net net.variables.set_flat(theta)
plt.figure() stats_name = stats_names[i] stat_means, stat_stds = means[:, i], stds[:, i] plt.errorbar(range(len(stat_means)), stat_means, yerr=stat_stds) plt.title("Driving, Learner: {}, Stat: {}".format(agent_name, stats_name)) plt.xlabel("Number of Iterations") plt.ylabel(stats_name) plt.savefig(os.path.join(FILEPATH, 'stats_{}_{}.png').format(agent_name, stats_name)) def env_init(): return DrivingEnv(render_mode=False, config_filepath=config_filepath) def env_reinit(env): return env ray.env.env = ray.EnvironmentVariable(env_init, env_reinit) def supervisor_init(): return SearchAgent() def supervisor_reinit(spvsr): return spvsr ray.env.supervisor = ray.EnvironmentVariable(supervisor_init, supervisor_reinit) def agent_dart_init(): env = ray.env.env supervisor = ray.env.supervisor return RayDartAgent(DeepLearner(), env, supervisor) def agent_dagger_init():
from actorcritic import ActorCritic NUM_WORKERS = 2 GAMMA = 0.95 ray.init(num_workers=NUM_WORKERS) def env_init(): return gym.make('CartPole-v0') def env_reinit(env): return env ray.env.env = ray.EnvironmentVariable(env_init, env_reinit) def ac_init(): env = ray.env.env hparams = { 'input_size': env.observation_space.shape[0], 'hidden_size': 64, 'num_actions': env.action_space.n, 'learning_rate': 0.001, 'entropy_wt': 0.01 } return ActorCritic(hparams) def ac_reinit(actor_critic):
with t.as_default(): m = GridWorldModel(2, statedim=(2, 1)) m.sess.run(tf.initialize_all_variables()) variables = ray.experimental.TensorFlowVariables(m.loss, m.sess) return m, m.opt, t, variables def gridWorldReinit(m): return m ray.env.gridworld = ray.EnvironmentVariable(gridWorldInit, gridWorldReinit) @ray.remote def ptrain(weights, dataset): m, opt, t, variables = ray.env.gridworld variables.set_weights(weights) with t.as_default(): with tf.variable_scope("optimizer"): return m.train(opt, dataset, 1, 0) return None