Python Space示例，gym.Space Python示例

示例#1

0

显示文件

        emb_graph = tf.tile(emb_graph, [1, tf.shape(emb_node)[1], 1])
        emb_node = tf.concat([emb_node, emb_graph], axis=2)
    return emb_node


#### debug

if __name__ == "__main__":
    adj_np = np.ones((5, 3, 4, 4))
    adj = tf.placeholder(shape=(5, 3, 4, 4), dtype=tf.float32)
    node_feature_np = np.ones((5, 1, 4, 3))
    node_feature = tf.placeholder(shape=(5, 1, 4, 3), dtype=tf.float32)

    ob_space = {}
    atom_type = 5
    ob_space['adj'] = gym.Space(shape=[3, 5, 5])
    ob_space['node'] = gym.Space(shape=[1, 5, atom_type])
    ac_space = gym.spaces.MultiDiscrete([10, 10, 3])
    policy = GCNPolicy(name='policy', ob_space=ob_space, ac_space=ac_space)

    stochastic = True
    env = gym.make('molecule-v0')  # in gym format
    env.init()
    ob = env.reset()

    # ob['adj'] = np.repeat(ob['adj'][None],2,axis=0)
    # ob['node'] = np.repeat(ob['node'][None],2,axis=0)

    print('adj', ob['adj'].shape)
    print('node', ob['node'].shape)
    with tf.Session() as sess:

示例#2

0

显示文件

    def __init__(self,
                 dt=0.01,
                 nt=1000,
                 seed=0,
                 task='no_collision',
                 map_file=None,
                 simulator_conf=None,
                 healthy_reward=1.0,
                 **kwargs):
        # TODO: other possible tasks: precision_landing
        assert task in [
            'velocity_control', 'no_collision', 'hovering_control'
        ], 'Invalid task setting'
        if simulator_conf is None:
            simulator_conf = os.path.join(os.path.dirname(__file__),
                                          'config.json')
        assert os.path.exists(simulator_conf), \
            'Simulator config file does not exist'

        self.dt = dt
        self.nt = nt
        self.ct = 0
        self.task = task
        self.healthy_reward = healthy_reward
        self.simulator = QuadrotorSim()

        cfg_dict = self.simulator.get_config(simulator_conf)
        self.valid_range = cfg_dict['range']
        self.action_space = gym.spaces.Box(
            low=np.array([cfg_dict['action_space_low']] * 4, dtype='float32'),
            high=np.array([cfg_dict['action_space_high']] * 4,
                          dtype='float32'),
            shape=[4])

        self.body_velocity_keys = ['b_v_x', 'b_v_y', 'b_v_z']
        self.body_position_keys = ['b_x', 'b_y', 'b_z']
        self.accelerator_keys = ['acc_x', 'acc_y', 'acc_z']
        self.gyroscope_keys = ['gyro_x', 'gyro_y', 'gyro_z']
        self.flight_pose_keys = ['pitch', 'roll', 'yaw']
        self.barometer_keys = ['z']
        self.task_velocity_control_keys = \
            ['next_target_g_v_x', 'next_target_g_v_y', 'next_target_g_v_z']

        obs_dim = len(self.body_velocity_keys) + \
            len(self.body_position_keys) + \
            len(self.accelerator_keys) + len(self.gyroscope_keys) + \
            len(self.flight_pose_keys) + len(self.barometer_keys)
        if self.task == 'velocity_control':
            obs_dim += len(self.task_velocity_control_keys)
        self.observation_space = gym.Space(shape=[obs_dim], dtype='float32')

        self.state = {}
        self.viewer = None
        self.x_offset = self.y_offset = self.z_offset = 0
        self.pos_0 = np.array([0.0] * 3).astype(np.float32)

        if self.task == 'velocity_control':
            self.velocity_targets = \
                self.simulator.define_velocity_control_task(
                    dt, nt, seed)
        elif self.task in ['no_collision', 'hovering_control']:
            self.map_matrix = Quadrotor.load_map(map_file)

            # Only for single quadrotor, also mark its start position
            y_offsets, x_offsets = np.where(self.map_matrix == -1)
            assert len(y_offsets) == 1
            self.y_offset = y_offsets[0]
            self.x_offset = x_offsets[0]
            self.z_offset = 5.  # TODO: setup a better init height
            self.map_matrix[self.y_offset, self.x_offset] = 0

示例#3

0

显示文件

文件： mols.py 项目： zizai/notebooks

    def __init__(self,
                 dataset_name,
                 logp_ratio=1,
                 qed_ratio=1,
                 sa_ratio=1,
                 reward_step_total=1,
                 is_normalize=0,
                 reward_type='gan',
                 reward_target=0.5,
                 has_scaffold=False,
                 has_feature=False,
                 is_conditional=False,
                 conditional='low',
                 max_action=128,
                 min_action=20,
                 force_final=False):
        self.dataset = all_datasets.get(dataset_name)
        if self.dataset is None:
            raise ValueError("dataset \"{}\" not found in [{}]".format(
                dataset_name, ", ".join(all_datasets.keys())))

        self.is_normalize = bool(is_normalize)
        self.is_conditional = is_conditional
        self.has_feature = has_feature
        self.reward_type = reward_type
        self.reward_target = reward_target
        self.force_final = force_final

        self.conditional_list = load_conditional(conditional)
        if self.is_conditional:
            self.conditional = random.sample(self.conditional_list, 1)[0]
            self.mol = Chem.RWMol(Chem.MolFromSmiles(self.conditional[0]))
            Chem.SanitizeMol(self.mol,
                             sanitizeOps=Chem.SanitizeFlags.SANITIZE_KEKULIZE)
        else:
            self.mol = Chem.RWMol()

        self.smile_list = []
        if dataset_name == 'gdb':
            possible_atoms = ['C', 'N', 'O', 'S', 'Cl']  # gdb 13
        elif dataset_name == 'zinc':
            possible_atoms = ['C', 'N', 'O', 'S', 'P', 'F', 'I', 'Cl',
                              'Br']  # ZINC
        if self.has_feature:
            self.possible_formal_charge = np.array([-1, 0, 1])
            self.possible_implicit_valence = np.array([-1, 0, 1, 2, 3, 4])
            self.possible_ring_atom = np.array([True, False])
            self.possible_degree = np.array([0, 1, 2, 3, 4, 5, 6, 7])
            self.possible_hybridization = np.array([
                Chem.rdchem.HybridizationType.SP,
                Chem.rdchem.HybridizationType.SP2,
                Chem.rdchem.HybridizationType.SP3,
                Chem.rdchem.HybridizationType.SP3D,
                Chem.rdchem.HybridizationType.SP3D2
            ],
                                                   dtype=object)
        possible_bonds = [
            Chem.rdchem.BondType.SINGLE, Chem.rdchem.BondType.DOUBLE,
            Chem.rdchem.BondType.TRIPLE
        ]  # , Chem.rdchem.BondType.AROMATIC
        self.atom_type_num = len(possible_atoms)
        self.possible_atom_types = np.array(possible_atoms)
        self.possible_bond_types = np.array(possible_bonds, dtype=object)

        if self.has_feature:
            # self.d_n = len(self.possible_atom_types) + len(
            #     self.possible_formal_charge) + len(
            #     self.possible_implicit_valence) + len(self.possible_ring_atom) + \
            #       len(self.possible_degree) + len(self.possible_hybridization)
            self.d_n = len(
                self.possible_atom_types) + 6  # 6 is the ring feature
        else:
            self.d_n = len(self.possible_atom_types)

        self.max_action = max_action
        self.min_action = min_action

        if dataset_name == 'gdb':
            self.max_atom = 13 + len(possible_atoms)  # gdb 13
        elif dataset_name == 'zinc':
            if self.is_conditional:
                self.max_atom = 38 + len(
                    possible_atoms) + self.min_action  # ZINC
            else:
                self.max_atom = 38 + len(
                    possible_atoms)  # ZINC  + self.min_action

        self.logp_ratio = logp_ratio
        self.qed_ratio = qed_ratio
        self.sa_ratio = sa_ratio
        self.reward_step_total = reward_step_total
        self.action_space = gym.spaces.MultiDiscrete(
            [self.max_atom, self.max_atom, 3, 2])
        self.observation_space = {}
        self.observation_space['adj'] = gym.Space(
            shape=[len(possible_bonds), self.max_atom, self.max_atom])
        self.observation_space['node'] = gym.Space(
            shape=[1, self.max_atom, self.d_n])

        self.counter = 0

        # load scaffold data if necessary
        self.has_scaffold = has_scaffold
        if has_scaffold:
            self.scaffold = load_scaffold()
            self.max_scaffold = 6

        self.level = 0  # for curriculum learning, level starts with 0, and increase afterwards

示例#4

0

显示文件

    def init(self,
             reward_function,
             n_iterations,
             max_iterations,
             max_molecule_size=38,
             possible_atoms=None,
             terminate_on_done=False,
             expected_reward=0.5,
             target_reward=0.5,
             molecule_rollback=False,
             reward_func_sees_all=True):
        """
        Constructor that exists outside of the __init__ method because gym doesn't allow addition of
        additional parameters when calling gym.make() function

        Parameters
        ----------
        reward_function : function
            A function that returns a reward value, which is a float
        n_iterations: int
            The number of iterations before an interim reward is returned
        max_iterations: int
            User specified, the environment stop after this many iterations
        max_molecule_size: int
            The maximum permitted number of atoms in this molecule
        possible_atoms: list[str]
            List of elements of the periodic table to be used in the environment, which are strings
        terminate_on_done: boolean
            Boolean signifying whether to terminate on the done flag of step method becoming True
        expected_reward: float
            Argument that will be passed into the RL method the user specifies
        target_reward: float
            Target reward for the RL method the user specifies
        molecule_rollback: boolean
            If this is set to true, then changes made to the molecule will be rolled back upon getting negative rewards
        reward_func_sees_all: boolean
            For reducing the visibility of the class to the reward function, useful for optimization purposes
            since it drastically reduces the amount of information passed to the reward function. Being false,
            it only provides the RWMol object to the reward function object
        """

        if possible_atoms is None:
            possible_atoms = ['C', 'N', 'O', 'S', 'Cl']
        self.possible_atoms = possible_atoms
        self.possible_bonds = [
            Chem.rdchem.BondType.SINGLE, Chem.rdchem.BondType.DOUBLE,
            Chem.rdchem.BondType.TRIPLE
        ]
        self.max_molecule_size = max_molecule_size

        self.n_iterations = n_iterations
        self.max_iterations = max_iterations
        self.interim_reward = 0
        self.cumulative_reward = 0

        self.mol = Chem.RWMol()

        # dim d_n. Array that contains the possible atom symbols strs
        self.possible_atom_types = np.array(self.possible_atoms)
        # dim d_e. Array that contains the possible rdkit.Chem.rdchem.BondType objects
        self.possible_bond_types = np.array(self.possible_bonds, dtype=object)
        self.current_atom_index = None

        self.reward_function = reward_function

        # step_counter for number of iterations that occured
        self.step_counter = 0

        self.action_space = gym.spaces.MultiDiscrete([
            len(self.possible_atom_types), self.max_molecule_size,
            self.max_molecule_size,
            len(self.possible_bonds)
        ])

        # param adj: adjacency matrix, numpy array, dim k x k.
        # param edge: edge attribute matrix, numpy array, dim k x k x d_e.
        # param node: node attribute matrix, numpy array, dim k x d_n.
        # k: maximum atoms in molecule
        # de: possible bond types
        # dn: possible atom types
        self.observation_space = {
            'adj':
            gym.Space(
                shape=[1, self.max_molecule_size, self.max_molecule_size]),
            'edge':
            gym.Space(shape=[
                len(self.possible_bonds), self.max_molecule_size,
                self.max_molecule_size
            ]),
            'node':
            gym.Space(shape=[
                1, self.max_molecule_size,
                len(self.possible_atom_types)
            ])
        }

        self.pymol_window_flag = False
        self.terminate_on_done = terminate_on_done
        self.molecule_rollback = molecule_rollback
        self.reward_func_sees_all = reward_func_sees_all

        self.expected_reward = expected_reward
        self.target_reward = target_reward

        try:
            os.makedirs("./pymol_renderings")
        except OSError as e:
            if e.errno != errno.EEXIST:
                raise

示例#5

0

显示文件

文件： molecule.py 项目： mufeili/rl_graph_generation

    def init(self,
             data_type='zinc',
             logp_ratio=1,
             qed_ratio=1,
             sa_ratio=1,
             reward_step_total=1,
             is_normalize=0,
             reward_type='gan',
             reward_target=0.5,
             has_scaffold=False,
             has_feature=False,
             is_conditional=False,
             conditional='low',
             max_action=128,
             min_action=20,
             force_final=False):
        '''
        own init function, since gym does not support passing argument
        '''
        self.is_normalize = bool(is_normalize)
        self.is_conditional = is_conditional
        self.has_feature = has_feature
        self.reward_type = reward_type
        self.reward_target = reward_target
        self.force_final = force_final

        self.conditional_list = load_conditional(conditional)
        if self.is_conditional:
            self.conditional = random.sample(self.conditional_list, 1)[0]
            self.mol = Chem.RWMol(Chem.MolFromSmiles(self.conditional[0]))
            Chem.SanitizeMol(self.mol,
                             sanitizeOps=Chem.SanitizeFlags.SANITIZE_KEKULIZE)
        else:
            self.mol = Chem.RWMol()
        self.smile_list = []

        if data_type == 'zinc':
            possible_atoms = ['C', 'N', 'O', 'S', 'P', 'F', 'I', 'Cl',
                              'Br']  # ZINC
        if self.has_feature:
            self.possible_formal_charge = np.array([-1, 0, 1])
            self.possible_implicit_valence = np.array([-1, 0, 1, 2, 3, 4])
            self.possible_ring_atom = np.array([True, False])
            self.possible_degree = np.array([0, 1, 2, 3, 4, 5, 6, 7])
            self.possible_hybridization = np.array([
                Chem.rdchem.HybridizationType.SP,
                Chem.rdchem.HybridizationType.SP2,
                Chem.rdchem.HybridizationType.SP3,
                Chem.rdchem.HybridizationType.SP3D,
                Chem.rdchem.HybridizationType.SP3D2
            ],
                                                   dtype=object)
        possible_bonds = [
            Chem.rdchem.BondType.SINGLE, Chem.rdchem.BondType.DOUBLE,
            Chem.rdchem.BondType.TRIPLE
        ]  #, Chem.rdchem.BondType.AROMATIC
        self.atom_type_num = len(possible_atoms)
        self.possible_atom_types = np.array(possible_atoms)
        self.possible_bond_types = np.array(possible_bonds, dtype=object)

        if self.has_feature:
            # self.d_n = len(self.possible_atom_types) + len(
            #     self.possible_formal_charge) + len(
            #     self.possible_implicit_valence) + len(self.possible_ring_atom) + \
            #       len(self.possible_degree) + len(self.possible_hybridization)
            self.d_n = len(
                self.possible_atom_types) + 6  # 6 is the ring feature
        else:
            self.d_n = len(self.possible_atom_types)

        self.max_action = max_action
        self.min_action = min_action
        if data_type == 'zinc':
            if self.is_conditional:
                self.max_atom = 38 + len(
                    possible_atoms) + self.min_action  # ZINC
            else:
                self.max_atom = 38 + len(
                    possible_atoms)  # ZINC  + self.min_action

        self.logp_ratio = logp_ratio
        self.qed_ratio = qed_ratio
        self.sa_ratio = sa_ratio
        self.reward_step_total = reward_step_total
        self.action_space = gym.spaces.MultiDiscrete(
            [self.max_atom, self.max_atom, 3, 2])
        self.observation_space = {}
        self.observation_space['adj'] = gym.Space(
            shape=[len(possible_bonds), self.max_atom, self.max_atom])
        self.observation_space['node'] = gym.Space(
            shape=[1, self.max_atom, self.d_n])

        self.counter = 0

        ## load expert data
        cwd = os.path.dirname(__file__)
        if data_type == 'zinc':
            path = os.path.join(
                os.path.dirname(cwd), 'dataset',
                '250k_rndm_zinc_drugs_clean_sorted.smi')  # ZINC
        self.dataset = gdb_dataset(path)

        ## load scaffold data if necessary
        self.has_scaffold = has_scaffold
        if has_scaffold:
            self.scaffold = load_scaffold()
            self.max_scaffold = 6

        self.level = 0  # for curriculum learning, level starts with 0, and increase afterwards