Пример #1
0
def train(args, seed, writer=None):
    from baselines.ppo1 import pposgd_simple_gcn, gcn_policy
    import baselines.common.tf_util as U
    rank = MPI.COMM_WORLD.Get_rank()
    sess = U.single_threaded_session()
    sess.__enter__()
    if rank == 0:
        logger.configure()
    else:
        logger.configure(format_strs=[])
    workerseed = seed + 10000 * MPI.COMM_WORLD.Get_rank()
    set_global_seeds(workerseed)
    if args.env == 'molecule':
        env = gym.make('molecule-v0')
        env.init(
            data_type=args.dataset,
            logp_ratio=args.logp_ratio,
            qed_ratio=args.qed_ratio,
            sa_ratio=args.sa_ratio,
            reward_step_total=args.reward_step_total,
            is_normalize=args.normalize_adj,
            reward_type=args.reward_type,
            reward_target=args.reward_target,
            has_feature=bool(args.has_feature),
            is_conditional=bool(args.is_conditional),
            conditional=args.conditional,
            max_action=args.max_action,
            min_action=args.min_action)  # remember call this after gym.make!!
    elif args.env == 'graph':
        env = GraphEnv()
        env.init(reward_step_total=args.reward_step_total,
                 is_normalize=args.normalize_adj,
                 dataset=args.dataset)  # remember call this after gym.make!!
    print(env.observation_space)

    def policy_fn(name, ob_space, ac_space):
        return gcn_policy.GCNPolicy(name=name,
                                    ob_space=ob_space,
                                    ac_space=ac_space,
                                    atom_type_num=env.atom_type_num,
                                    args=args)

    env.seed(workerseed)

    pposgd_simple_gcn.learn(args,
                            env,
                            policy_fn,
                            max_timesteps=args.num_steps,
                            timesteps_per_actorbatch=256,
                            clip_param=0.2,
                            entcoeff=0.01,
                            optim_epochs=8,
                            optim_stepsize=args.lr,
                            optim_batchsize=32,
                            gamma=1,
                            lam=0.95,
                            schedule='linear',
                            writer=writer)
    env.close()
Пример #2
0
def train(args, seed, writer=None):
    from baselines.ppo1 import pposgd_simple_gcn, gcn_policy
    import baselines.common.tf_util as U
    rank = MPI.COMM_WORLD.Get_rank()
    # sess = U.single_threaded_session()
    # sess.__enter__()
    # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8)
    # sess = tf.Session(config=tf.ConfigProto(
    #     gpu_options=gpu_options,allow_soft_placement=True, log_device_placement=True))
    # sess.__enter__()
    # config = tf.ConfigProto(
    #     device_count={'GPU': 0}
    # )
    # sess = tf.Session(config=config)
    # sess.__enter__()
    config = tf.ConfigProto(allow_soft_placement=True)
    # config.gpu_options.allow_growth = True
    config.gpu_options.per_process_gpu_memory_fraction = 0.04
    sess = tf.Session(config=config)
    sess.__enter__()

    if rank == 0:
        logger.configure()
    else:
        logger.configure(format_strs=[])
    workerseed = seed + 10000 * MPI.COMM_WORLD.Get_rank()
    set_global_seeds(workerseed)

    if args.model_path != '':
        args.model_path = os.path.abspath(args.model_path)
    if args.env == 'molecule':
        env = gym.make('molecule-v0')
        env.init(data_type=args.dataset,
                 logp_ratio=args.logp_ratio,
                 qed_ratio=args.qed_ratio,
                 sa_ratio=args.sa_ratio,
                 reward_step_total=args.reward_step_total,
                 is_normalize=args.normalize_adj,
                 reward_type=args.reward_type,
                 reward_target=args.reward_target,
                 has_feature=bool(args.has_feature),
                 is_conditional=bool(args.is_conditional),
                 conditional=args.conditional,
                 max_action=args.max_action,
                 min_action=args.min_action,
                 model_path=args.model_path,
                 model2_path=args.model2_path
                 )  # remember call this after gym.make!!
    elif args.env == 'graph':
        env = GraphEnv()
        env.init(reward_step_total=args.reward_step_total,
                 is_normalize=args.normalize_adj,
                 dataset=args.dataset)  # remember call this after gym.make!!
    print(env.observation_space)

    def policy_fn(name, ob_space, ac_space):
        return gcn_policy.GCNPolicy(name=name,
                                    ob_space=ob_space,
                                    ac_space=ac_space,
                                    atom_type_num=env.atom_type_num,
                                    args=args)

    env.seed(workerseed)

    pposgd_simple_gcn.learn(args,
                            env,
                            policy_fn,
                            max_timesteps=args.num_steps,
                            timesteps_per_actorbatch=256,
                            clip_param=0.2,
                            entcoeff=0.01,
                            optim_epochs=8,
                            optim_stepsize=args.lr,
                            optim_batchsize=32,
                            gamma=1,
                            lam=0.95,
                            schedule='linear',
                            writer=writer)
    env.close()