예제 #1
0
    def test_experiment(self):
        # This test performs two experiments with the environment in a row
        _ = self.env.reset()
        steps = 0

        done = False
        while not done:
            steps += 1
            _, reward, done, _ = self.env.step(self.treatment)
        self.assertEqual(steps, self.n_steps)

        # compare output with regular simulation
        treat = prepare_dict(self.treatment, max_dosage=self.max_dosage)
        for i, line in enumerate(self.cell_lines):
            simulator = Simulator()
            simulator.initialize(line)
            r = simulator.apply_treatment(treat)
            self.assertTrue(np.abs(r - reward[i]) < EPS)

        _ = self.env.reset()
        steps = 0

        done = False
        while not done:
            steps += 1
            _, reward, done, _ = self.env.step(self.treatment)
        self.assertEqual(steps, self.n_steps)

        # compare output with regular simulation
        treat = prepare_dict(self.treatment, max_dosage=self.max_dosage)
        for i, line in enumerate(self.cell_lines):
            simulator = Simulator()
            simulator.initialize(line)
            r = simulator.apply_treatment(treat)
            self.assertTrue(np.abs(r - reward[i]) < EPS)
    def test_repeated_evaluation(self):
        SEQUENTIAL_CONFIG = {
            "n_steps": 3,
            "cell_lines": ['DV90', 'HS695T'],
            "objective": TestObjective(),
            "max_dosage": 8000,
            "domain": UnitSimplex(7),
            "scale": "linear"
        }

        repeated_evaluator = Evaluator(SEQUENTIAL_CONFIG,
                                       self.n_envs,
                                       store=True,
                                       repeated=True)
        x = np.array([0.5, 0.5, 0, 0, 0, 0, 0])
        treats = [
            prepare_dict(x,
                         max_dosage=TEST_CONFIG["max_dosage"],
                         scale=TEST_CONFIG["scale"])
            for _ in range(self.n_steps)
        ]

        # use evaluator
        _, prolifs = repeated_evaluator.evaluate([x])  # write test to check y
        p = 1
        for i in range(self.n_steps):
            simulator = Simulator()
            simulator.initialize("HS695T")
            p *= simulator.apply_treatment(treats[i])
        print("p: ", p)
        print("prolis: ", prolifs)
        self.assertAlmostEqual(prolifs[0][1], p)
        repeated_evaluator.terminate()
예제 #3
0
    def step(self, action, verbose=False):
        '''
        Run one time step of the environment's dynamics. The actions is assumed to be a flat numpy
        array or a list.
        
        :param action: an action provided by the environment
        :return observation: agent's observation of the current environment
        :return reward: amount of reward returned after previous action
        :return done: whether the episode has ended, in which case further step() calls will return undefined results
        :return info: contains auxiliary diagnostic information (helpful for debugging, and sometimes learning)
        '''
        assert self.step_counter < self.n_steps, "Environment has already terminated."
        assert self.domain.contains(
            action), "The provided actions does not belong to the domain."

        action_dict = prepare_dict(action,
                                   max_dosage=self.max_dosage,
                                   scale=self.scale)
        for k in action_dict:
            self.commulative_treatment[k] += action_dict[k]
        jobs = [action_dict for _ in range(len(self.cell_lines))]

        results = self.worker_pool.map(execute_experiment, jobs)
        rel_proliferations = self.sort_by_cell_line(results)
        # NOTE: For now we return the proliferation values as observation
        obs = np.array(rel_proliferations)
        reward = self.objective.eval(rel_proliferations,
                                     self.commulative_treatment)

        self.step_counter += 1
        if self.step_counter < self.n_steps:
            return obs, reward, False, {}
        else:
            return obs, reward, True, {}
    def test_evaluate(self):
        ys, prolifs = self.evaluator.evaluate(self.xs)
        ys, prolifs = self.evaluator.evaluate(self.xs)

        # compare results with direct serial execution
        for i, x in enumerate(self.xs):
            self.assertTrue(np.abs(ys[i] - np.average(prolifs[i])) < EPS)
            avg = 0
            for j, line in enumerate(TEST_CONFIG["cell_lines"]):
                treat = prepare_dict(x, max_dosage=TEST_CONFIG["max_dosage"])
                simulator = Simulator()
                simulator.initialize(line)
                r = simulator.apply_treatment(treat)
                self.assertTrue(np.abs(prolifs[i][j] - r) < EPS)
                avg += r
            avg /= len(TEST_CONFIG["cell_lines"])
            self.assertTrue(np.abs(avg - ys[i]) < EPS)
    def test_buffer(self):
        # test if things get stored in buffer correctly
        _, _ = self.evaluator.evaluate(self.xs)
        buffer_dict = self.evaluator.get_res_dict()
        self.assertEqual(
            len(buffer_dict[TEST_CONFIG["cell_lines"][0]]
                ["relative_proliferation"]), EVALS)

        # compare buffer content with direct serial execution
        for i, x in enumerate(self.xs):
            for line in TEST_CONFIG["cell_lines"]:
                treat = prepare_dict(x, max_dosage=TEST_CONFIG["max_dosage"])
                simulator = Simulator()
                simulator.initialize(line)
                prolif = simulator.apply_treatment(treat)
                self.assertTrue(
                    np.abs(prolif -
                           buffer_dict[line]["relative_proliferation"][i]) <=
                    EPS)