def test_abort_on_all_empty_inputs(self): open(self.test_dir + "/empty", "w").close() reader = JsonReader([ self.test_dir + "/empty", ]) self.assertRaises(ValueError, lambda: reader.next()) with open(self.test_dir + "/empty1", "w") as f: for _ in range(100): f.write("\n") with open(self.test_dir + "/empty2", "w") as f: for _ in range(100): f.write("\n") reader = JsonReader([ self.test_dir + "/empty1", self.test_dir + "/empty2", ]) self.assertRaises(ValueError, lambda: reader.next())
def testAbortOnAllEmptyInputs(self): open(self.test_dir + "/empty", "w").close() reader = JsonReader([ self.test_dir + "/empty", ]) self.assertRaises(ValueError, lambda: reader.next()) with open(self.test_dir + "/empty1", "w") as f: for _ in range(100): f.write("\n") with open(self.test_dir + "/empty2", "w") as f: for _ in range(100): f.write("\n") reader = JsonReader([ self.test_dir + "/empty1", self.test_dir + "/empty2", ]) self.assertRaises(ValueError, lambda: reader.next())
def test_multiple_output_workers(self): ray.shutdown() ray.init(num_cpus=4, ignore_reinit_error=True) for fw in framework_iterator(frameworks=["tf", "torch"]): agent = PG( env="CartPole-v0", config={ "num_workers": 2, "output": self.test_dir + fw, "rollout_fragment_length": 250, "framework": fw, }, ) agent.train() self.assertEqual(len(os.listdir(self.test_dir + fw)), 2) reader = JsonReader(self.test_dir + fw + "/*.json") reader.next()
def testAbortOnAllEmptyInputs(self): ioctx = IOContext(self.test_dir, {}, 0, None) open(self.test_dir + "/empty", "w").close() reader = JsonReader(ioctx, [ self.test_dir + "/empty", ]) self.assertRaises(ValueError, lambda: reader.next()) with open(self.test_dir + "/empty1", "w") as f: for _ in range(100): f.write("\n") with open(self.test_dir + "/empty2", "w") as f: for _ in range(100): f.write("\n") reader = JsonReader(ioctx, [ self.test_dir + "/empty1", self.test_dir + "/empty2", ]) self.assertRaises(ValueError, lambda: reader.next())
def test_agent_output_infos(self): """Verify that the infos dictionary is written to the output files. Note, with torch this is always the case. """ output_config = {"store_infos": True} for fw in framework_iterator(frameworks=("torch", "tf")): self.write_outputs(self.test_dir, fw, output_config=output_config) self.assertEqual(len(os.listdir(self.test_dir + fw)), 1) reader = JsonReader(self.test_dir + fw + "/*.json") data = reader.next() assert "infos" in data
def testReadWrite(self): ioctx = IOContext(self.test_dir, {}, 0, None) writer = JsonWriter( self.test_dir, ioctx, max_file_size=5000, compress_columns=["obs"]) for i in range(100): writer.write(make_sample_batch(i)) reader = JsonReader(self.test_dir + "/*.json") seen_a = set() seen_o = set() for i in range(1000): batch = reader.next() seen_a.add(batch["actions"][0]) seen_o.add(batch["obs"][0]) self.assertGreater(len(seen_a), 90) self.assertLess(len(seen_a), 101) self.assertGreater(len(seen_o), 90) self.assertLess(len(seen_o), 101)
def testSkipsOverCorruptedLines(self): with open(self.test_dir + "/f1", "w") as f: f.write(_to_json(make_sample_batch(0), [])) f.write("\n") f.write(_to_json(make_sample_batch(1), [])) f.write("\n") f.write(_to_json(make_sample_batch(2), [])) f.write("\n") f.write(_to_json(make_sample_batch(3), [])) f.write("\n") f.write("{..corrupted_json_record") reader = JsonReader([ self.test_dir + "/f1", ]) seen_a = set() for i in range(10): batch = reader.next() seen_a.add(batch["actions"][0]) self.assertEqual(len(seen_a), 4)
def test_skips_over_empty_lines_and_files(self): open(self.test_dir + "/empty", "w").close() with open(self.test_dir + "/f1", "w") as f: f.write("\n") f.write("\n") f.write(_to_json(make_sample_batch(0), [])) with open(self.test_dir + "/f2", "w") as f: f.write(_to_json(make_sample_batch(1), [])) f.write("\n") reader = JsonReader([ self.test_dir + "/empty", self.test_dir + "/f1", "file://" + self.test_dir + "/f2", ]) seen_a = set() for i in range(100): batch = reader.next() seen_a.add(batch["actions"][0]) self.assertEqual(len(seen_a), 2)
def test_skips_over_corrupted_lines(self): with open(self.test_dir + "/f1", "w") as f: f.write(_to_json(make_sample_batch(0), [])) f.write("\n") f.write(_to_json(make_sample_batch(1), [])) f.write("\n") f.write(_to_json(make_sample_batch(2), [])) f.write("\n") f.write(_to_json(make_sample_batch(3), [])) f.write("\n") f.write("{..corrupted_json_record") reader = JsonReader([ self.test_dir + "/f1", ]) seen_a = set() for i in range(10): batch = reader.next() seen_a.add(batch["actions"][0]) self.assertEqual(len(seen_a), 4)
def testSkipsOverEmptyLinesAndFiles(self): open(self.test_dir + "/empty", "w").close() with open(self.test_dir + "/f1", "w") as f: f.write("\n") f.write("\n") f.write(_to_json(make_sample_batch(0), [])) with open(self.test_dir + "/f2", "w") as f: f.write(_to_json(make_sample_batch(1), [])) f.write("\n") reader = JsonReader([ self.test_dir + "/empty", self.test_dir + "/f1", "file:" + self.test_dir + "/f2", ]) seen_a = set() for i in range(100): batch = reader.next() seen_a.add(batch["actions"][0]) self.assertEqual(len(seen_a), 2)
def testSkipsOverCorruptedLines(self): ioctx = IOContext(self.test_dir, {}, 0, None) with open(self.test_dir + "/f1", "w") as f: f.write(_to_json(make_sample_batch(0), [])) f.write("\n") f.write(_to_json(make_sample_batch(1), [])) f.write("\n") f.write(_to_json(make_sample_batch(2), [])) f.write("\n") f.write(_to_json(make_sample_batch(3), [])) f.write("\n") f.write("{..corrupted_json_record") reader = JsonReader(ioctx, [ self.test_dir + "/f1", ]) seen_a = set() for i in range(10): batch = reader.next() seen_a.add(batch["actions"][0]) self.assertEqual(len(seen_a), 4)
def testSkipsOverEmptyLinesAndFiles(self): ioctx = IOContext(self.test_dir, {}, 0, None) open(self.test_dir + "/empty", "w").close() with open(self.test_dir + "/f1", "w") as f: f.write("\n") f.write("\n") f.write(_to_json(make_sample_batch(0), [])) with open(self.test_dir + "/f2", "w") as f: f.write(_to_json(make_sample_batch(1), [])) f.write("\n") reader = JsonReader(ioctx, [ self.test_dir + "/empty", self.test_dir + "/f1", "file:" + self.test_dir + "/f2", ]) seen_a = set() for i in range(100): batch = reader.next() seen_a.add(batch["actions"][0]) self.assertEqual(len(seen_a), 2)
def testAgentOutputOk(self): self.writeOutputs(self.test_dir) self.assertEqual(len(os.listdir(self.test_dir)), 1) ioctx = IOContext(self.test_dir, {}, 0, None) reader = JsonReader(ioctx, self.test_dir + "/*.json") reader.next()
def test_agent_output_ok(self): for fw in framework_iterator(frameworks=("torch", "tf")): self.write_outputs(self.test_dir, fw) self.assertEqual(len(os.listdir(self.test_dir + fw)), 1) reader = JsonReader(self.test_dir + fw + "/*.json") reader.next()
def testAgentOutputOk(self): self.writeOutputs(self.test_dir) self.assertEqual(len(os.listdir(self.test_dir)), 1) reader = JsonReader(self.test_dir + "/*.json") reader.next()
class TorchCustomLossModel(TorchModelV2, nn.Module): """PyTorch version of the CustomLossModel above.""" def __init__(self, obs_space, action_space, num_outputs, model_config, name, input_files): super().__init__(obs_space, action_space, num_outputs, model_config, name) nn.Module.__init__(self) self.input_files = input_files # Create a new input reader per worker. self.reader = JsonReader(self.input_files) self.fcnet = TorchFC(self.obs_space, self.action_space, num_outputs, model_config, name="fcnet") @override(ModelV2) def forward(self, input_dict, state, seq_lens): # Delegate to our FCNet. return self.fcnet(input_dict, state, seq_lens) @override(ModelV2) def custom_loss(self, policy_loss, loss_inputs): """Calculates a custom loss on top of the given policy_loss(es). Args: policy_loss (List[TensorType]): The list of already calculated policy losses (as many as there are optimizers). loss_inputs (TensorStruct): Struct of np.ndarrays holding the entire train batch. Returns: List[TensorType]: The altered list of policy losses. In case the custom loss should have its own optimizer, make sure the returned list is one larger than the incoming policy_loss list. In case you simply want to mix in the custom loss into the already calculated policy losses, return a list of altered policy losses (as done in this example below). """ # Get the next batch from our input files. batch = self.reader.next() # Define a secondary loss by building a graph copy with weight sharing. obs = restore_original_dimensions(torch.from_numpy( batch["obs"]).float(), self.obs_space, tensorlib="torch") logits, _ = self.forward({"obs": obs}, [], None) # You can also add self-supervised losses easily by referencing tensors # created during _build_layers_v2(). For example, an autoencoder-style # loss can be added as follows: # ae_loss = squared_diff( # loss_inputs["obs"], Decoder(self.fcnet.last_layer)) print("FYI: You can also use these tensors: {}, ".format(loss_inputs)) # Compute the IL loss. action_dist = TorchCategorical(logits, self.model_config) imitation_loss = torch.mean( -action_dist.logp(torch.from_numpy(batch["actions"]))) self.imitation_loss_metric = imitation_loss.item() self.policy_loss_metric = np.mean([l.item() for l in policy_loss]) # Add the imitation loss to each already calculated policy loss term. # Alternatively (if custom loss has its own optimizer): # return policy_loss + [10 * self.imitation_loss] return [loss_ + 10 * imitation_loss for loss_ in policy_loss] def metrics(self): return { "policy_loss": self.policy_loss_metric, "imitation_loss": self.imitation_loss_metric, }
def test_marwil_loss_function(self): """ To generate the historic data used in this test case, first run: $ ./train.py --run=PPO --env=CartPole-v0 \ --stop='{"timesteps_total": 50000}' \ --config='{"output": "/tmp/out", "batch_mode": "complete_episodes"}' """ rllib_dir = Path(__file__).parent.parent.parent.parent print("rllib dir={}".format(rllib_dir)) data_file = os.path.join(rllib_dir, "tests/data/cartpole/small.json") print("data_file={} exists={}".format(data_file, os.path.isfile(data_file))) config = marwil.DEFAULT_CONFIG.copy() config["num_workers"] = 0 # Run locally. # Learn from offline data. config["input"] = [data_file] for fw in framework_iterator(config, frameworks=["torch", "tf2"]): reader = JsonReader(inputs=[data_file]) batch = reader.next() trainer = marwil.MARWILTrainer(config=config, env="CartPole-v0") policy = trainer.get_policy() model = policy.model # Calculate our own expected values (to then compare against the # agent's loss output). cummulative_rewards = compute_advantages(batch, 0.0, config["gamma"], 1.0, False, False)["advantages"] if fw == "torch": cummulative_rewards = torch.tensor(cummulative_rewards) batch = policy._lazy_tensor_dict(batch) model_out, _ = model.from_batch(batch) vf_estimates = model.value_function() adv = cummulative_rewards - vf_estimates if fw == "torch": adv = adv.detach().cpu().numpy() adv_squared = np.mean(np.square(adv)) c_2 = 100.0 + 1e-8 * (adv_squared - 100.0) c = np.sqrt(c_2) exp_advs = np.exp(config["beta"] * (adv / c)) logp = policy.dist_class(model_out, model).logp(batch["actions"]) if fw == "torch": logp = logp.detach().cpu().numpy() # Calculate all expected loss components. expected_vf_loss = 0.5 * adv_squared expected_pol_loss = -1.0 * np.mean(exp_advs * logp) expected_loss = \ expected_pol_loss + config["vf_coeff"] * expected_vf_loss # Calculate the algorithm's loss (to check against our own # calculation above). batch.set_get_interceptor(None) postprocessed_batch = policy.postprocess_trajectory(batch) loss_func = marwil.marwil_tf_policy.marwil_loss if fw != "torch" \ else marwil.marwil_torch_policy.marwil_loss loss_out = loss_func(policy, model, policy.dist_class, policy._lazy_tensor_dict(postprocessed_batch)) # Check all components. if fw == "torch": check(policy.v_loss, expected_vf_loss, decimals=4) check(policy.p_loss, expected_pol_loss, decimals=4) else: check(policy.loss.v_loss, expected_vf_loss, decimals=4) check(policy.loss.p_loss, expected_pol_loss, decimals=4) check(loss_out, expected_loss, decimals=3)
def test_marwil_loss_function(self): """ To generate the historic data used in this test case, first run: $ ./train.py --run=PPO --env=CartPole-v0 \ --stop='{"timesteps_total": 50000}' \ --config='{"output": "/tmp/out", "batch_mode": "complete_episodes"}' """ rllib_dir = Path(__file__).parent.parent.parent.parent print("rllib dir={}".format(rllib_dir)) data_file = os.path.join(rllib_dir, "tests/data/cartpole/small.json") print("data_file={} exists={}".format(data_file, os.path.isfile(data_file))) config = (marwil.MARWILConfig().rollouts( num_rollout_workers=0).offline_data(input_=[data_file]) ) # Learn from offline data. for fw, sess in framework_iterator(config, session=True): reader = JsonReader(inputs=[data_file]) batch = reader.next() trainer = config.build(env="CartPole-v0") policy = trainer.get_policy() model = policy.model # Calculate our own expected values (to then compare against the # agent's loss output). cummulative_rewards = compute_advantages(batch, 0.0, config.gamma, 1.0, False, False)["advantages"] if fw == "torch": cummulative_rewards = torch.tensor(cummulative_rewards) if fw != "tf": batch = policy._lazy_tensor_dict(batch) model_out, _ = model(batch) vf_estimates = model.value_function() if fw == "tf": model_out, vf_estimates = policy.get_session().run( [model_out, vf_estimates]) adv = cummulative_rewards - vf_estimates if fw == "torch": adv = adv.detach().cpu().numpy() adv_squared = np.mean(np.square(adv)) c_2 = 100.0 + 1e-8 * (adv_squared - 100.0) c = np.sqrt(c_2) exp_advs = np.exp(config.beta * (adv / c)) dist = policy.dist_class(model_out, model) logp = dist.logp(batch["actions"]) if fw == "torch": logp = logp.detach().cpu().numpy() elif fw == "tf": logp = sess.run(logp) # Calculate all expected loss components. expected_vf_loss = 0.5 * adv_squared expected_pol_loss = -1.0 * np.mean(exp_advs * logp) expected_loss = expected_pol_loss + config.vf_coeff * expected_vf_loss # Calculate the algorithm's loss (to check against our own # calculation above). batch.set_get_interceptor(None) postprocessed_batch = policy.postprocess_trajectory(batch) loss_func = (MARWILTF2Policy.loss if fw != "torch" else MARWILTorchPolicy.loss) if fw != "tf": policy._lazy_tensor_dict(postprocessed_batch) loss_out = loss_func(policy, model, policy.dist_class, postprocessed_batch) else: loss_out, v_loss, p_loss = policy.get_session().run( # policy._loss is create by TFPolicy, and is basically the # loss tensor of the static graph. [ policy._loss, policy._marwil_loss.v_loss, policy._marwil_loss.p_loss, ], feed_dict=policy._get_loss_inputs_dict(postprocessed_batch, shuffle=False), ) # Check all components. if fw == "torch": check(policy.v_loss, expected_vf_loss, decimals=4) check(policy.p_loss, expected_pol_loss, decimals=4) elif fw == "tf": check(v_loss, expected_vf_loss, decimals=4) check(p_loss, expected_pol_loss, decimals=4) else: check(policy._marwil_loss.v_loss, expected_vf_loss, decimals=4) check(policy._marwil_loss.p_loss, expected_pol_loss, decimals=4) check(loss_out, expected_loss, decimals=3)