def test_experiment(self): # This test performs two experiments with the environment in a row _ = self.env.reset() steps = 0 done = False while not done: steps += 1 _, reward, done, _ = self.env.step(self.treatment) self.assertEqual(steps, self.n_steps) # compare output with regular simulation treat = prepare_dict(self.treatment, max_dosage=self.max_dosage) for i, line in enumerate(self.cell_lines): simulator = Simulator() simulator.initialize(line) r = simulator.apply_treatment(treat) self.assertTrue(np.abs(r - reward[i]) < EPS) _ = self.env.reset() steps = 0 done = False while not done: steps += 1 _, reward, done, _ = self.env.step(self.treatment) self.assertEqual(steps, self.n_steps) # compare output with regular simulation treat = prepare_dict(self.treatment, max_dosage=self.max_dosage) for i, line in enumerate(self.cell_lines): simulator = Simulator() simulator.initialize(line) r = simulator.apply_treatment(treat) self.assertTrue(np.abs(r - reward[i]) < EPS)
def test_repeated_evaluation(self): SEQUENTIAL_CONFIG = { "n_steps": 3, "cell_lines": ['DV90', 'HS695T'], "objective": TestObjective(), "max_dosage": 8000, "domain": UnitSimplex(7), "scale": "linear" } repeated_evaluator = Evaluator(SEQUENTIAL_CONFIG, self.n_envs, store=True, repeated=True) x = np.array([0.5, 0.5, 0, 0, 0, 0, 0]) treats = [ prepare_dict(x, max_dosage=TEST_CONFIG["max_dosage"], scale=TEST_CONFIG["scale"]) for _ in range(self.n_steps) ] # use evaluator _, prolifs = repeated_evaluator.evaluate([x]) # write test to check y p = 1 for i in range(self.n_steps): simulator = Simulator() simulator.initialize("HS695T") p *= simulator.apply_treatment(treats[i]) print("p: ", p) print("prolis: ", prolifs) self.assertAlmostEqual(prolifs[0][1], p) repeated_evaluator.terminate()
def step(self, action, verbose=False): ''' Run one time step of the environment's dynamics. The actions is assumed to be a flat numpy array or a list. :param action: an action provided by the environment :return observation: agent's observation of the current environment :return reward: amount of reward returned after previous action :return done: whether the episode has ended, in which case further step() calls will return undefined results :return info: contains auxiliary diagnostic information (helpful for debugging, and sometimes learning) ''' assert self.step_counter < self.n_steps, "Environment has already terminated." assert self.domain.contains( action), "The provided actions does not belong to the domain." action_dict = prepare_dict(action, max_dosage=self.max_dosage, scale=self.scale) for k in action_dict: self.commulative_treatment[k] += action_dict[k] jobs = [action_dict for _ in range(len(self.cell_lines))] results = self.worker_pool.map(execute_experiment, jobs) rel_proliferations = self.sort_by_cell_line(results) # NOTE: For now we return the proliferation values as observation obs = np.array(rel_proliferations) reward = self.objective.eval(rel_proliferations, self.commulative_treatment) self.step_counter += 1 if self.step_counter < self.n_steps: return obs, reward, False, {} else: return obs, reward, True, {}
def test_evaluate(self): ys, prolifs = self.evaluator.evaluate(self.xs) ys, prolifs = self.evaluator.evaluate(self.xs) # compare results with direct serial execution for i, x in enumerate(self.xs): self.assertTrue(np.abs(ys[i] - np.average(prolifs[i])) < EPS) avg = 0 for j, line in enumerate(TEST_CONFIG["cell_lines"]): treat = prepare_dict(x, max_dosage=TEST_CONFIG["max_dosage"]) simulator = Simulator() simulator.initialize(line) r = simulator.apply_treatment(treat) self.assertTrue(np.abs(prolifs[i][j] - r) < EPS) avg += r avg /= len(TEST_CONFIG["cell_lines"]) self.assertTrue(np.abs(avg - ys[i]) < EPS)
def test_buffer(self): # test if things get stored in buffer correctly _, _ = self.evaluator.evaluate(self.xs) buffer_dict = self.evaluator.get_res_dict() self.assertEqual( len(buffer_dict[TEST_CONFIG["cell_lines"][0]] ["relative_proliferation"]), EVALS) # compare buffer content with direct serial execution for i, x in enumerate(self.xs): for line in TEST_CONFIG["cell_lines"]: treat = prepare_dict(x, max_dosage=TEST_CONFIG["max_dosage"]) simulator = Simulator() simulator.initialize(line) prolif = simulator.apply_treatment(treat) self.assertTrue( np.abs(prolif - buffer_dict[line]["relative_proliferation"][i]) <= EPS)