def main(args):
    U.make_session(num_cpu=1).__enter__()
    set_global_seeds(args.seed)
    env = gym.make(args.env_id)

    task_name = get_task_short_name(args)
    logger.configure(dir='log_trpo_cartpole/%s' % task_name)

    def policy_fn(name, ob_space, ac_space, reuse=False):
        return build_policy(env, 'mlp', value_network='copy')

    import logging
    import os.path as osp
    import bench
    env = bench.Monitor(
        env,
        logger.get_dir() and osp.join(logger.get_dir(), "monitor.json"))
    env.seed(args.seed)
    gym.logger.setLevel(logging.WARN)

    args.checkpoint_dir = osp.join(args.checkpoint_dir, task_name)
    args.log_dir = osp.join(args.log_dir, task_name)

    if args.task == 'train':
        from utils.mujoco_dset import Dset_gym
        expert_observations = np.genfromtxt(
            'expert_data/cartpole/observations.csv')
        expert_actions = np.genfromtxt('expert_data/cartpole/actions.csv',
                                       dtype=np.int32)
        expert_dataset = Dset_gym(inputs=expert_observations,
                                  labels=expert_actions,
                                  randomize=True)
        # expert_dataset = (expert_observations, expert_actions)
        reward_giver = Discriminator(env,
                                     args.adversary_hidden_size,
                                     entcoeff=args.adversary_entcoeff)
        reward_guidance = Guidance(env,
                                   args.policy_hidden_size,
                                   expert_dataset=expert_dataset)
        train(env, args.seed, policy_fn, reward_giver, reward_guidance,
              expert_dataset, args.algo, args.g_step, args.d_step,
              args.policy_entcoeff, args.num_timesteps, args.save_per_iter,
              args.checkpoint_dir, args.log_dir, args.pretrained,
              args.BC_max_iter, args.loss_percent, task_name)
    elif args.task == 'evaluate':
        avg_len, avg_ret = runner(env,
                                  policy_fn,
                                  args.load_model_path,
                                  timesteps_per_batch=1024,
                                  number_trajs=100,
                                  stochastic_policy=args.stochastic_policy,
                                  save=args.save_sample)
        result = np.array([avg_ret, avg_len])
        txt_name = args.load_model_path + 'result.txt'
        np.savetxt(txt_name, result, fmt="%d", delimiter=" ")
        print(args.load_model_path, avg_ret, avg_len)
        print('保存成功')
    else:
        raise NotImplementedError
    env.close()
示例#2
0
 def configure_logging(self):
     """Configure the experiment"""
     if self.comm is None or self.rank == 0:
         log_path = self.get_log_path()
         formats_strs = ['stdout', 'log', 'csv']
         fmtstr = "configuring logger"
         if self.comm is not None and self.rank == 0:
             fmtstr += " [master]"
         logger.info(fmtstr)
         logger.configure(dir_=log_path, format_strs=formats_strs)
         fmtstr = "logger configured"
         if self.comm is not None and self.rank == 0:
             fmtstr += " [master]"
         logger.info(fmtstr)
         logger.info("  directory: {}".format(log_path))
         logger.info("  output formats: {}".format(formats_strs))
         # In the same log folder, log args in yaml in yaml file
         file_logger = FileLogger(uuid=self.uuid,
                                  path=self.get_log_path(),
                                  file_prefix=self.name_prefix)
         file_logger.set_info('note', self.args.note)
         file_logger.set_info('uuid', self.uuid)
         file_logger.set_info('task', self.args.task)
         file_logger.set_info('args', str(self.args))
         fmtstr = "experiment configured"
         if self.comm is not None:
             fmtstr += " [{} MPI workers]".format(self.comm.Get_size())
         logger.info(fmtstr)
     else:
         logger.info("configuring logger [worker #{}]".format(self.rank))
         logger.configure(dir_=None, format_strs=None)
         logger.set_level(logger.DISABLED)
示例#3
0
def main():
    import argparse
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--env-id', type=str, default='map')
    parser.add_argument('--num-env', type=int, default=32)
    parser.add_argument('--gamma', type=float, default=0.99)
    parser.add_argument('--lr', type=float, default=2.5e-4)
    parser.add_argument('--ent-coef', type=float, default=0.01)
    parser.add_argument('--seed', help='RNG seed', type=int, default=0)
    parser.add_argument('--num-timesteps', type=int, default=int(40e6))
    parser.add_argument('--next-n', type=int, default=10)
    parser.add_argument('--nslupdates', type=int, default=10)
    parser.add_argument('--nepochs', type=int, default=4)
    parser.add_argument('--seq-len', type=int, default=10)
    parser.add_argument('--K', type=int, default=1)
    parser.add_argument('--log', type=str, default='result/tmp2')
    args = parser.parse_args()
    logger.configure(args.log)
    train(args.env_id,
          num_timesteps=args.num_timesteps,
          seed=args.seed,
          num_env=args.num_env,
          gamma=args.gamma,
          ent_coef=args.ent_coef,
          next_n=args.next_n,
          nslupdates=args.nslupdates,
          nepochs=args.nepochs,
          seq_len=args.seq_len,
          K=args.K)
示例#4
0
def main(args):
    render = args.render
    if not render:
        import matplotlib
        matplotlib.use('Agg')
        import matplotlib.pyplot as plt
    from utils.utils import TabularPolicy, TabularValueFun
    from part1.tabular_value_iteration import ValueIteration
    from envs import Grid1DEnv, GridWorldEnv
    envs = [GridWorldEnv(seed=0), GridWorldEnv(seed=1)]

    for env in envs:
        env_name = env.__name__
        exp_dir = os.getcwd() + '/data/part1/%s/policy_type%s_temperature%s/' % (env_name, args.policy_type, args.temperature)
        logger.configure(dir=exp_dir, format_strs=['stdout', 'log', 'csv'])
        args_dict = vars(args)
        args_dict['env'] = env_name
        json.dump(vars(args), open(exp_dir + '/params.json', 'w'), indent=2, sort_keys=True)

        policy = TabularPolicy(env)
        value_fun = TabularValueFun(env)
        algo = ValueIteration(env,
                              value_fun,
                              policy,
                              policy_type=args.policy_type,
                              render=render,
                              temperature=args.temperature)
        algo.train()
示例#5
0
def main():
    # Parse the JSON arguments
    config_args = None
    try:
        config_args = parse_args()
    except:
        print("Add a config file using \'--config file_name.json\'")
        exit(1)

    tf.reset_default_graph()

    config = tf.ConfigProto(allow_soft_placement=True,
                            intra_op_parallelism_threads=config_args.num_envs,
                            inter_op_parallelism_threads=config_args.num_envs)
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)

    # Prepare Directories
    # TODO: add openai logger
    config_args.experiment_dir, config_args.summary_dir, config_args.checkpoint_dir, config_args.output_dir, config_args.test_dir = \
        create_experiment_dirs(config_args.experiment_dir)
    logger.configure(config_args.experiment_dir)
    logger.info("Print configuration .....")
    logger.info(config_args)

    acktr = ACKTR(sess, config_args)

    if config_args.to_train:
        acktr.train()
    if config_args.to_test:
        acktr.test(total_timesteps=10000000)
示例#6
0
def main():
    """ Main method.

    Parameters:
        None

    Returns:
        None
    """

    # configure logging
    logger.configure()

    # cli arguments
    args = cli.cli()

    # parse origin
    if isinstance(args.origin, Point):
        origin = args.origin
    else:
        origin = Point.from_string(args.origin)

    # parse neighbors
    neighbors = parse_neighbors(args.neighbors)

    # determine nearest
    nearest_neighbors = nearest(origin, args.number, neighbors)

    # log summary
    summary(origin, nearest_neighbors)

    return
示例#7
0
def main(args):
    U.make_session(num_cpu=1).__enter__()
    set_global_seeds(args.seed)
    env = gym.make(args.env_id)

    task_name = get_task_short_name(args)
    logger.configure(dir='log_trpo_mujoco/%s' % task_name)

    def policy_fn(name, ob_space, ac_space, reuse=False):
        return MlpPolicy(name=name,
                         ob_space=ob_space,
                         ac_space=ac_space,
                         reuse=reuse,
                         hid_size=args.policy_hidden_size,
                         num_hid_layers=2)

    import logging
    import os.path as osp
    import bench
    env = bench.Monitor(
        env,
        logger.get_dir() and osp.join(logger.get_dir(), "monitor.json"))
    env.seed(args.seed)
    gym.logger.setLevel(logging.WARN)

    args.checkpoint_dir = osp.join(args.checkpoint_dir, task_name)
    args.log_dir = osp.join(args.log_dir, task_name)

    if args.task == 'train':
        expert_dataset = Mujoco_Dset(expert_path=args.expert_path,
                                     traj_limitation=args.traj_limitation)
        reward_giver = Discriminator(env,
                                     args.adversary_hidden_size,
                                     entcoeff=args.adversary_entcoeff)
        reward_guidance = Guidance(env,
                                   args.policy_hidden_size,
                                   expert_dataset=expert_dataset)
        train(env, args.seed, policy_fn, reward_giver, reward_guidance,
              expert_dataset, args.algo, args.g_step, args.d_step,
              args.policy_entcoeff, args.num_timesteps, args.save_per_iter,
              args.checkpoint_dir, args.log_dir, args.pretrained,
              args.BC_max_iter, args.loss_percent, task_name)
    elif args.task == 'evaluate':
        avg_len, avg_ret = runner(env,
                                  policy_fn,
                                  args.load_model_path,
                                  timesteps_per_batch=1024,
                                  number_trajs=100,
                                  stochastic_policy=args.stochastic_policy,
                                  save=args.save_sample)

        result = np.array([avg_ret, avg_len])
        txt_name = args.load_model_path + 'result.txt'
        np.savetxt(txt_name, result, fmt="%d", delimiter=" ")
        print(args.load_model_path, avg_ret, avg_len)
        print('保存成功')
    else:
        raise NotImplementedError
    env.close()
示例#8
0
def configure_logger(log_dir):
    logger.configure(log_dir, format_strs=['log'])
    # global # tb
    # # tb = logger.Logger(log_dir, [logger.make_output_format('tensorboard', log_dir),
    #                              logger.make_output_format('csv', log_dir),
    #                              logger.make_output_format('stdout', log_dir)])
    global log
    log = logger.log
def main(args):
    # configure logger, disable logging in child MPI processes (with rank > 0)

    arg_parser = common_arg_parser()
    args, unknown_args = arg_parser.parse_known_args(args)
    extra_args = parse_cmdline_kwargs(unknown_args)

    if args.extra_import is not None:
        import_module(args.extra_import)

    if MPI is None or MPI.COMM_WORLD.Get_rank() == 0:
        rank = 0
        # logger.configure()
        # logger.configure(dir=log_path, format_strs=['stdout', 'log', 'csv', 'tensorboard'])
        logger.configure(dir=log_path, format_strs=['stdout', 'csv'])
    else:
        logger.configure(format_strs=[])
        rank = MPI.COMM_WORLD.Get_rank()

    model, env = train(args, extra_args)

    if args.save_path is not None and rank == 0:
        save_path = osp.expanduser(args.save_path)
        model.save(save_path)

    if args.play:
        logger.log("Running trained model")
        obs = env.reset()

        state = model.initial_state if hasattr(model,
                                               'initial_state') else None
        dones = np.zeros((1, ))

        episode_rew = 0
        i = 0
        while True:
            if state is not None:
                actions, _, state, _ = model.step(obs, S=state, M=dones)
            else:
                actions, _, _, _, _ = model.step(obs)
                # actions, _, _, _ = model.step(obs)
            obs, rew, done, _ = env.step(actions)
            episode_rew += rew[0] if isinstance(env, VecEnv) else rew
            env.render()
            done = done.any() if isinstance(done, np.ndarray) else done
            i += 1
            if done:
                print(f'episode_rew={episode_rew}')
                print(i)
                episode_rew = 0
                obs = env.reset()

    env.close()
    model.sess.close()
    tf.reset_default_graph()
    return model
示例#10
0
def test(episodes=20, agent=None, load_path=None, ifrender=False, log=False):
    if log:
        logger.configure(dir="./log/", format_strs="stdout")
    if agent is None:
        agent = DQN(num_state=16, num_action=4)
        if load_path:
            agent.load(load_path)
        else:
            agent.load()

    env = Game2048Env()
    score_list = []
    highest_list = []

    for i in range(episodes):
        state, _, done, info = env.reset()
        state = log2_shaping(state)

        start = time.time()
        while True:
            action = agent.select_action(state, deterministic=True)
            next_state, _, done, info = env.step(action)
            next_state = log2_shaping(next_state)
            state = next_state

            if ifrender:
                env.render()

            if done:
                print(env.Matrix)
                if log:
                    logger.logkv('episode number', i + 1)
                    logger.logkv('episode reward', info['score'])
                    logger.logkv('episode steps', info['steps'])
                    logger.logkv('highest', info['highest'])
                    logger.dumpkvs()
                break

        end = time.time()
        if log:
            print('episode time:{} s\n'.format(end - start))

        score_list.append(info['score'])
        highest_list.append(info['highest'])

    print('mean score:{}, mean highest:{}'.format(np.mean(score_list),
                                                  np.mean(highest_list)))
    print('max score:{}, max hightest:{}'.format(np.max(score_list),
                                                 np.max(highest_list)))
    result_info = {
        'mean': np.mean(score_list),
        'max': np.max(score_list),
        'list': score_list
    }
    print(highest_list)
    return result_info
示例#11
0
def main(args):
    render = args.render
    if not render:
        import matplotlib
        matplotlib.use('Agg')
        import matplotlib.pyplot as plt
    from envs import DoubleIntegratorEnv, MountainCarEnv, CartPoleEnv, SwingUpEnv
    from utils.utils import TabularPolicy, TabularValueFun
    from part1.tabular_value_iteration import ValueIteration
    from part2.look_ahead_policy import LookAheadPolicy
    from part2.discretize import Discretize
    envs = [
        DoubleIntegratorEnv(),
        MountainCarEnv(),
        CartPoleEnv(),
        SwingUpEnv()
    ]

    for env in envs:
        env_name = env.__class__.__name__

        if env_name == 'MountainCarEnv':
            state_discretization = 51
        else:
            state_discretization = 21
        exp_dir = os.getcwd(
        ) + '/data/part2_d/%s/policy_type%s_mode%s_horizon%s/' % (
            env_name, args.policy_type, args.mode, args.horizon)
        logger.configure(dir=exp_dir, format_strs=['stdout', 'log', 'csv'])
        args_dict = vars(args)
        args_dict['env'] = env_name
        json.dump(vars(args),
                  open(exp_dir + '/params.json', 'w'),
                  indent=2,
                  sort_keys=True)

        env = Discretize(env,
                         state_discretization=state_discretization,
                         mode=args.mode)
        value_fun = TabularValueFun(env)
        if args.policy_type == 'tabular':
            policy = TabularPolicy(env)
        elif args.policy_type == 'look_ahead':
            policy = LookAheadPolicy(env, value_fun, args.horizon)
        else:
            raise NotImplementedError
        algo = ValueIteration(env,
                              value_fun,
                              policy,
                              render=render,
                              max_itr=args.max_iter,
                              num_rollouts=1,
                              render_itr=5,
                              log_itr=5)
        algo.train()
示例#12
0
def configure_logger(log_dir):
    logger.configure(log_dir, format_strs=['log'])
    global tb
    tb = logger.Logger(log_dir, [
        logger.make_output_format('tensorboard', log_dir),
        logger.make_output_format('csv', log_dir),
        logger.make_output_format('stdout', log_dir)
    ])
    global log
    logger.set_level(60)
    log = logger.log
示例#13
0
def main(args):
    U.make_session(num_cpu=1).__enter__()
    set_global_seeds(args.seed)
    from dp_env_v3 import DPEnv
    env = DPEnv

    task_name = get_task_short_name(args)

    if rank == 0:
        logger.configure(dir='log_gail/%s' % task_name)
    if rank != 0:
        logger.set_level(logger.DISABLED)

    def policy_fn(name, ob_space, ac_space, reuse=False):
        return MlpPolicy(name=name,
                         ob_space=ob_space,
                         ac_space=ac_space,
                         reuse=reuse,
                         hid_size=args.policy_hidden_size,
                         num_hid_layers=2)

    import logging
    import os.path as osp
    import bench
    env = bench.Monitor(
        env,
        logger.get_dir() and osp.join(logger.get_dir(), "monitor.json"))
    env.seed(args.seed)
    gym.logger.setLevel(logging.WARN)
    task_name = get_task_name(args)
    args.checkpoint_dir = osp.join(args.checkpoint_dir, task_name)
    args.log_dir = osp.join(args.log_dir, task_name)

    if args.task == 'train':
        dataset = Mujoco_Dset(expert_path=args.expert_path,
                              traj_limitation=args.traj_limitation)
        reward_giver = TransitionClassifier(env,
                                            args.adversary_hidden_size,
                                            entcoeff=args.adversary_entcoeff)
        train(env, args.seed, policy_fn, reward_giver, dataset, args.algo,
              args.g_step, args.d_step, args.policy_entcoeff,
              args.num_timesteps, args.save_per_iter, args.checkpoint_dir,
              args.log_dir, args.pretrained, args.BC_max_iter, task_name)
    elif args.task == 'evaluate':
        runner(env,
               policy_fn,
               args.load_model_path,
               timesteps_per_batch=1024,
               number_trajs=10,
               stochastic_policy=args.stochastic_policy,
               save=args.save_sample)
    else:
        raise NotImplementedError
    env.close()
示例#14
0
def main():
    """
    Run the atari test
    """
    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--env', help='environment ID', default='BreakoutNoFrameskip-v4')
    parser.add_argument('--seed', help='RNG seed', type=int, default=0)
    parser.add_argument('--prioritized', type=int, default=1)
    parser.add_argument('--dueling', type=int, default=1)
    parser.add_argument('--prioritized-replay-alpha', type=float, default=0.6)
    parser.add_argument('--num-timesteps', type=int, default=int(1e7))

    args = parser.parse_args()
    logger.configure()
    set_global_seeds(args.seed)
    env = make_atari(args.env)
    env = bench.Monitor(env, logger.get_dir())
    env = wrap_atari_dqn(env)
    policy = partial(CnnPolicy, dueling=args.dueling == 1)

    # model = DQN(
    #     env=env,
    #     policy=policy,
    #     learning_rate=1e-4,
    #     buffer_size=10000,
    #     exploration_fraction=0.1,
    #     exploration_final_eps=0.01,
    #     train_freq=4,
    #     learning_starts=10000,
    #     target_network_update_freq=1000,
    #     gamma=0.99,
    #     prioritized_replay=bool(args.prioritized),
    #     prioritized_replay_alpha=args.prioritized_replay_alpha,
    # )
    model = DQN(
        env=env,
        policy_class=CnnPolicy,
        learning_rate=1e-4,
        buffer_size=10000,
        double_q=False,
        prioritized_replay=True,
        prioritized_replay_alpha=0.6,
        dueling=True,
        train_freq=4,
        learning_starts=10000,
        exploration_fraction=0.1,
        exploration_final_eps=0.01,
        target_network_update_freq=1000,
        model_path='atari_Breakout_duel'
    )
    # model.learn(total_timesteps=args.num_timesteps, seed=args.seed)
    model.load('atari_Breakout_duel')
    model.evaluate(100)
    env.close()
def main_test(args, extra_args, save_path):
    # configure logger, disable logging in child MPI processes (with rank > 0)

    if MPI is None or MPI.COMM_WORLD.Get_rank() == 0:
        rank = 0
        logger.configure()
    else:
        logger.configure(format_strs=[])
        rank = MPI.COMM_WORLD.Get_rank()

    model, env = test(args, extra_args, save_path)
示例#16
0
文件: gdqn.py 项目: isyuanyin/SHA-KG
def configure_logger(log_dir):
    print("save log at: {}".format(log_dir))
    logger.configure(log_dir, format_strs=['log'])
    global tb
    tb = logger.Logger(log_dir, [
        logger.make_output_format('tensorboard', log_dir),
        logger.make_output_format('csv', log_dir),
        logger.make_output_format('stdout', log_dir)
    ])
    global log
    log = logger.log
示例#17
0
    def learn(self,
              total_timesteps,
              log_interval=5,
              reward_target=None,
              log_to_file=False):
        """
        Initiate the training of the algorithm.

        :param total_timesteps: (int)   total number of timesteps the agent is to run for
        :param log_interval: (int)      how often to perform logging
        :param reward_target: (int)     reaching the reward target stops training early
        :param log_to_file: (bool)      specify whether output ought to be logged
        """
        logger.configure("ICM", self.env_id, log_to_file)
        start_time = time.time()
        iteration = 0

        while self.num_timesteps < total_timesteps:
            progress = round(self.num_timesteps / total_timesteps * 100, 2)
            self.collect_samples()

            iteration += 1
            if log_interval is not None and iteration % log_interval == 0:
                logger.record("Progress", str(progress) + '%')
                logger.record("time/total timesteps", self.num_timesteps)
                if len(self.ep_info_buffer) > 0 and len(
                        self.ep_info_buffer[0]) > 0:
                    logger.record(
                        "rollout/ep_rew_mean",
                        np.mean(
                            [ep_info["r"] for ep_info in self.ep_info_buffer]))
                    logger.record("rollout/num_episodes", self.num_episodes)
                fps = int(self.num_timesteps / (time.time() - start_time))
                logger.record("time/total_time", (time.time() - start_time))
                logger.dump(step=self.num_timesteps)

            self.train()

            if reward_target is not None and np.mean(
                [ep_info["r"]
                 for ep_info in self.ep_info_buffer]) > reward_target:
                logger.record("time/total timesteps", self.num_timesteps)
                if len(self.ep_info_buffer) > 0 and len(
                        self.ep_info_buffer[0]) > 0:
                    logger.record(
                        "rollout/ep_rew_mean",
                        np.mean(
                            [ep_info["r"] for ep_info in self.ep_info_buffer]))
                    logger.record("rollout/num_episodes", self.num_episodes)
                fps = int(self.num_timesteps / (time.time() - start_time))
                logger.record("time/total_time", (time.time() - start_time))
                logger.dump(step=self.num_timesteps)
                break
        return self
示例#18
0
def main(args):
    U.make_session(num_cpu=1).__enter__()
    set_global_seeds(args.seed)
    # from dp_env_v2 import DPEnv
    from dp_env_v3 import DPEnv
    # from dp_env_test import DPEnv
    env = DPEnv()
    # env = gym.make('Humanoid-v2')

    task_name = get_task_short_name(args)

    def policy_fn(name, ob_space, ac_space, reuse=False):
        return MlpPolicy(name=name, ob_space=ob_space, ac_space=ac_space,
                                    reuse=reuse, hid_size=args.policy_hidden_size, num_hid_layers=2)

    if args.task == 'train':
        import logging
        import os.path as osp
        import bench
        if MPI is None or MPI.COMM_WORLD.Get_rank() == 0:
            logger.configure(dir='log_tmp/%s'%task_name)
        if MPI.COMM_WORLD.Get_rank() != 0:
            logger.set_level(logger.DISABLED)
        env = bench.Monitor(env, logger.get_dir() and
                            osp.join(logger.get_dir(), "monitor.json"))
        env.seed(args.seed)
        gym.logger.setLevel(logging.WARN)
        task_name = get_task_short_name(args)
        args.checkpoint_dir = osp.join(args.checkpoint_dir, task_name)
        args.log_dir = osp.join(args.log_dir, task_name)

        train(env,
              args.seed,
              policy_fn,
              args.g_step,
              args.policy_entcoeff,
              args.pretrained_weight_path,
              args.num_timesteps,
              args.save_per_iter,
              args.checkpoint_dir,
              args.log_dir,
              task_name)
    elif args.task == 'evaluate':
        runner(env,
               policy_fn,
               args.load_model_path,
               timesteps_per_batch=1024,
               number_trajs=100,
               stochastic_policy=args.stochastic_policy,
               save=args.save_sample)
    else:
        raise NotImplementedError
    env.close()
示例#19
0
def run_train(params, exp_name):
    for seed in params["random_seeds"]:
        # set seed
        print("Using random seed {}".format(seed))
        set_seed(seed)

        # logger
        exp_dir = get_exp_name(exp_name, seed)
        logger.configure(exp_dir)
        logger.info("Print configuration .....")
        logger.info(params)
        train(params)

    return
示例#20
0
def main(args):
    render = args.render
    if not render:
        import matplotlib
        matplotlib.use('Agg')
        import matplotlib.pyplot as plt
    from utils.utils import TabularPolicy
    from utils.value_function import TabularValueFun
    from algos.tabular_value_iteration import ValueIteration
    from envs import ASRSEnv, TabularEnv, ProbDistEnv, DynamicProbEnv, StaticOrderProcess, SeasonalOrderProcess

    num_products = np.array(eval(args.storage_shape)).prod()
    assert (eval(args.dist_param) is None) or (num_products == len(
        eval(args.dist_param)
    )), 'storage_shape should be consistent with dist_param length'
    op = StaticOrderProcess(num_products=num_products,
                            dist_param=eval(args.dist_param))

    base_env = ASRSEnv(eval(args.storage_shape),
                       order_process=op,
                       origin_coord=eval(args.exit_coord))

    env = TabularEnv(base_env)

    env_name = env.__name__
    exp_dir = os.getcwd(
    ) + '/data/version1/%s/policy_type%s_temperature%s_envsize_%s/' % (
        env_name, args.policy_type, args.temperature,
        np.array(eval(args.storage_shape)).prod())
    logger.configure(dir=exp_dir,
                     format_strs=['stdout', 'log', 'csv'],
                     level=eval(args.logger_level))
    args_dict = vars(args)
    args_dict['env'] = env_name
    json.dump(vars(args),
              open(exp_dir + '/params.json', 'w'),
              indent=2,
              sort_keys=True)

    policy = TabularPolicy(env)
    value_fun = TabularValueFun(env)
    algo = ValueIteration(env,
                          value_fun,
                          policy,
                          policy_type=args.policy_type,
                          render=render,
                          temperature=args.temperature,
                          num_rollouts=args.num_rollouts)
    algo.train()
    value_fun.save(f'{exp_dir}/value_fun.npy')
示例#21
0
def configure_logger(log_dir, add_tb=1, add_wb=1, args=None):
    logger.configure(log_dir, format_strs=['log'])
    global tb
    log_types = [
        logger.make_output_format('log', log_dir),
        logger.make_output_format('json', log_dir),
        logger.make_output_format('stdout', log_dir)
    ]
    if add_tb: log_types += [logger.make_output_format('tensorboard', log_dir)]
    if add_wb:
        log_types += [logger.make_output_format('wandb', log_dir, args=args)]
    tb = logger.Logger(log_dir, log_types)
    global log
    log = logger.log
示例#22
0
def main():
    logger.configure('logs/simulate')
    global T, n_bills, n_taxis, occupied
    results = []
    for n_lanes in range(2, 10):
        bills, n_taxis_left, n_passengers_left = [], [], []
        for seed in range(N_RUNS):
            np.random.seed(seed)
            occupied = [False for _ in range(n_lanes + 1)]
            T, n_bills, n_taxis, sta = 0, 0, 0, 0
            lanes = [
                Lane(i, n_lanes + 1, lam=0.1 / n_lanes) for i in range(n_lanes)
            ]
            enter = np.random.poisson(0.1, size=10000)
            while T < 10000:
                if sta == 0:
                    if n_taxis < M:
                        n_taxis += enter[T]
                    else:
                        sta = 1
                elif n_taxis < N:
                    sta = 0
                for lane in lanes:
                    lane.step()
                T += 1
            bills.append(n_bills)
            n_taxis_left.append(n_taxis)
            n_passengers_left.append(
                np.sum([lane.n_passengers for lane in lanes]))

        results.append(bills)

        logger.record_tabular('lanes', n_lanes)
        logger.record_tabular('bills mean', np.mean(bills))
        logger.record_tabular('bills std', np.std(bills))
        logger.record_tabular('taxis mean', np.mean(n_taxis_left))
        logger.record_tabular('passengers mean', np.mean(n_passengers_left))
        logger.dump_tabular()

    df = pd.DataFrame(np.reshape(results, -1)).rename(columns={0: '# bills'})
    df.insert(0, '# lanes', [i for i in range(2, 10) for _ in range(N_RUNS)],
              True)
    sns.boxplot(x='# lanes',
                y='# bills',
                data=df,
                showmeans=True,
                meanline=True)
    plt.grid(linestyle='--')
    plt.savefig('logs/simulate/boxplot.jpg')
    plt.show()
示例#23
0
def main(args):
    render = args.render
    if not render:
        import matplotlib
        matplotlib.use('Agg')
        import matplotlib.pyplot as plt
    from utils.utils import TabularPolicy, LookAheadPolicy, SimpleMaxPolicy
    from utils.value_function import CNNValueFun, FFNNValueFun, TabularValueFun
    from algos.function_approximate_value_iteration import FunctionApproximateValueIteration
    from envs import ASRSEnv, ProbDistEnv

    assert np.array(eval(args.storage_shape)).prod() == len(
        eval(args.dist_param)
    ), 'storage_shape should be consistent with dist_param length'
    env = ProbDistEnv(
        ASRSEnv(eval(args.storage_shape),
                origin_coord=eval(args.exit_coord),
                dist_param=eval(args.dist_param)))

    env_name = env.__name__
    exp_dir = os.getcwd() + '/data/version3/%s/policy_type%s_envsize_%s/' % (
        env_name, args.policy_type, np.array(eval(args.storage_shape)).prod())
    logger.configure(dir=exp_dir,
                     format_strs=['stdout', 'log', 'csv'],
                     level=eval(args.logger_level))
    args_dict = vars(args)
    args_dict['env'] = env_name
    json.dump(vars(args),
              open(exp_dir + '/params.json', 'w'),
              indent=2,
              sort_keys=True)

    value_fun = FFNNValueFun(env)
    policy = SimpleMaxPolicy(env, value_fun, num_acts=args.num_acts)
    # policy = LookAheadPolicy(env,
    #                         value_fun,
    #                         horizon=args.horizon,
    #                         look_ahead_type=args.policy_type,
    #                         num_acts=args.num_acts)
    algo = FunctionApproximateValueIteration(env,
                                             value_fun,
                                             policy,
                                             learning_rate=args.learning_rate,
                                             batch_size=args.batch_size,
                                             num_acts=args.num_acts,
                                             render=render,
                                             num_rollouts=args.num_rollouts,
                                             max_itr=args.max_iter,
                                             log_itr=5)
    algo.train()
示例#24
0
def test(test_fn, constants):
    """Runs tests based on the provided function."""
    logger.configure(constants.VERBOSE)

    try:
        test_fn()
        logging.info("Test Passed")
    except Exception as e:
        logging.error("Test Failed")
        logging.exception(e)

        if hasattr(e, "output"):
            logging.error("Exception output: %s", e.output)
    finally:
        sql_wrapper.call_drop_database(constants)
示例#25
0
    def __init__(self,
                 url,
                 headers,
                 payload,
                 query_filter=None,
                 max_cost_points=1000,
                 leak_rate=50,
                 max_retries=5):
        """Constructor for GraphQL/Shopify request

        Args:
            url (String): Shopify GraphQL API URL
            headers (dict): HTTP headers
            payload (str): Query/Mutation string
            query_filter (str, optional): [description]. Defaults to None.
            max_cost_points (int, optional): [description]. Defaults to 1000.
            leak_rate (int, optional): [description]. Defaults to 50.
            max_retries (int, optional): [description]. Defaults to 5.
        """
        super().__init__(max_retries=max_retries)
        self.log = logger.configure("default")
        self._url = url
        self._headers = headers
        self._payload = payload
        self._query_filter = query_filter
示例#26
0
def create_gvgai_environment(env_id):
    from common.atari_wrappers import wrap_deepmind, make_atari, ActionDirectionEnv
    initial_direction = {'gvgai-testgame1': 3, 'gvgai-testgame2': 3}
    logger.configure()
    game_name = env_id.split('-lvl')[0]
    does_need_action_direction = False

    # Environment creation
    env = make_atari(env_id)
    env = bench.Monitor(env, logger.get_dir())
    env = wrap_deepmind(env, episode_life=False, clip_rewards=False, frame_stack=False, scale=True)
    if game_name in initial_direction:
        print("We should model with action direction")
        env = ActionDirectionEnv(env, initial_direction=initial_direction[game_name])
        does_need_action_direction = True
    return env, does_need_action_direction, game_name
示例#27
0
def main(args):
    render = args.render
    if not render:
        import matplotlib
        matplotlib.use('Agg')
        import matplotlib.pyplot as plt
    from envs import DoubleIntegratorEnv, MountainCarEnv, CartPoleEnv, SwingUpEnv
    from utils.utils import VectorizeMujocoEnv
    from part3.look_ahead_policy import LookAheadPolicy
    from utils.value_functions import MLPValueFun
    from part3.continous_value_iteration import ContinousStateValueIteration
    envs = [
        DoubleIntegratorEnv(),
        MountainCarEnv(),
        CartPoleEnv(),
        SwingUpEnv()
    ]

    for env in envs:
        env_name = env.__class__.__name__
        exp_dir = os.getcwd() + '/data/part3_b/%s/horizon%s' % (env_name,
                                                                args.horizon)
        logger.configure(dir=exp_dir, format_strs=['stdout', 'log', 'csv'])
        args_dict = vars(args)
        args_dict['env'] = env_name
        json.dump(vars(args),
                  open(exp_dir + '/params.json', 'w'),
                  indent=2,
                  sort_keys=True)

        value_fun = MLPValueFun(env, hidden_sizes=(512, 512, 512))
        policy = LookAheadPolicy(env,
                                 value_fun,
                                 horizon=args.horizon,
                                 look_ahead_type=args.policy_type,
                                 num_acts=args.num_acts)
        algo = ContinousStateValueIteration(env,
                                            value_fun,
                                            policy,
                                            learning_rate=args.learning_rate,
                                            batch_size=args.batch_size,
                                            num_acts=args.num_acts,
                                            render=args.render,
                                            max_itr=args.max_iter,
                                            log_itr=10)
        algo.train()
示例#28
0
文件: run.py 项目: Caiit/RL_project
def main(args):
    # configure logger, disable logging in child MPI processes (with rank > 0)

    arg_parser = common_arg_parser()
    args, unknown_args = arg_parser.parse_known_args(args)
    extra_args = parse_cmdline_kwargs(unknown_args)

    if MPI is None or MPI.COMM_WORLD.Get_rank() == 0:
        rank = 0
        logger.configure()
    else:
        logger.configure(format_strs=[])
        rank = MPI.COMM_WORLD.Get_rank()

    model, env = train(args, extra_args)
    env.close()

    if args.save_path is not None and rank == 0:
        save_path = osp.expanduser(args.save_path)
        model.save(save_path)

    if args.play:
        logger.log("Running trained model")
        env = build_env(args)
        # Start at start state
        if args.demo:
            env.starting_positions = get_all_states(args.env)

        obs = env.reset()

        def initialize_placeholders(nlstm=128, **kwargs):
            return np.zeros((args.num_env or 1, 2 * nlstm)), np.zeros((1))

        state, dones = initialize_placeholders(**extra_args)
        while True:
            actions, _, state, _ = model.step(obs, S=state, M=dones)
            obs, _, done, _ = env.step(actions)
            env.render()
            done = done.any() if isinstance(done, np.ndarray) else done

            if done:
                obs = env.reset()

        env.close()

    return model
示例#29
0
def instant_impulse(variant):
    env_name = variant['env_name']
    env = get_env_from_name(env_name)
    env_params = variant['env_params']

    eval_params = variant['eval_params']
    policy_params = variant['alg_params']
    policy_params.update({
        's_bound': env.observation_space,
        'a_bound': env.action_space,
    })

    build_func = get_policy(variant['algorithm_name'])
    if 'Fetch' in env_name or 'Hand' in env_name:
        s_dim = env.observation_space.spaces['observation'].shape[0] \
                + env.observation_space.spaces['achieved_goal'].shape[0] + \
                env.observation_space.spaces['desired_goal'].shape[0]
    else:
        s_dim = env.observation_space.shape[0]
    a_dim = env.action_space.shape[0]
    # d_dim = env_params['disturbance dim']
    policy = build_func(a_dim, s_dim, policy_params)
    # disturber = Disturber(d_dim, s_dim, disturber_params)

    log_path = variant['log_path'] + '/eval/safety_eval'
    variant['eval_params'].update({'magnitude': 0})
    logger.configure(dir=log_path, format_strs=['csv'])
    for magnitude in eval_params['magnitude_range']:
        variant['eval_params']['magnitude'] = magnitude
        diagnostic_dict = evaluation(variant, env, policy)

        string_to_print = ['magnitude', ':', str(magnitude), '|']
        [
            string_to_print.extend(
                [key, ':', str(round(diagnostic_dict[key], 2)), '|'])
            for key in diagnostic_dict.keys()
        ]
        print(''.join(string_to_print))

        logger.logkv('magnitude', magnitude)
        [
            logger.logkv(key, diagnostic_dict[key])
            for key in diagnostic_dict.keys()
        ]
        logger.dumpkvs()
示例#30
0
文件: shanghai.py 项目: gohsyi/taxi
def main():
    logger.configure('logs/shanghai')
    T = []
    for root, dirs, files in os.walk('data/Taxi_070220'):
        for csv in files[:5000]:
            if not csv.startswith('.'):
                df = pd.read_csv(os.path.join(root, csv), header=None)
                df = preprocess_df(df)
                T.append(Taxi(df))

    # draw_Hongqiao()

    logger.info('# long distance from Hongqiao:', np.sum([t.longs for t in T]))
    logger.info('# short distance from Hongqiao:',
                np.sum([t.shorts for t in T]))
    logger.info('# take customer to Hongqiao and stay:',
                np.sum([t.stays for t in T]))
    logger.info('# take customer to Hongqiao and leave:',
                np.sum([t.leaves for t in T]))
    logpath = config.get( "common", "logs" )
    pidfile = config.get( "common", "pidfile" )
except ConfigParser.Error, e:
    print >> sys.stderr, "Error: cannot parse config file"
    exit( 1 )

if not redises:
    print >> sys.stderr, "No redis instances are set in config."
    exit( 1 )

if len( redises ) != len( failovers ):
    print >> sys.stderr, "Incorrect config. Number of redises != number of failovers."
    exit( 1 )

try:    
    logger.configure( logpath )
except Exception, e:
    print >> sys.stderr, "Error while configuring logger:", e
    exit( 1 )

failover_id = str( uuid.uuid1() )
process_start = time.time()

analyzer = PingsAnalyzer( failtime, len( failovers ) )

try:    
    httpserver = HttpServer( port, analyzer )
except socket.error, e:
    print >> sys.stderr, "Error while starting http server:", e
    exit( 1 )
    #p_connector.commit()
    cursor.close()

if __name__ == '__main__':

    conf = json.load(open('conf/batchs.json'))

    # Command line args
    # __doc__ contains the module docstring
    arguments = docopt(__doc__, version=conf['version'])

    if arguments['--debug']:
        conf['log']['level'] = 'DEBUG'

    configure(conf['log']['level_values'][conf['log']['level']],
              conf['log']['dir'], conf['log']['filename'],
              conf['log']['max_filesize'], conf['log']['max_files'])

    # Paramétrag PostgreSQL
    try:
        # Connection loading
        logger.debug("dbname='{db}' user='******' host='{host}' password='******'".format(
            db=conf['postgresql']['credentials']['db'],
            user=conf['postgresql']['credentials']['user'],
            host=conf['postgresql']['host'],
            passw=conf['postgresql']['credentials']['password']
        ))
        connector = psycopg2.connect("dbname='{db}' user='******' host='{host}' password='******'".format(
            db=conf['postgresql']['credentials']['db'],
            user=conf['postgresql']['credentials']['user'],
            host=conf['postgresql']['host'],
示例#33
0
def run_import(type_doc = None, source_file = None):
    conf = json.load(open('./init-conf.json'))

    # Command line args
    arguments = docopt(__doc__, version=conf['version'])

    configure(conf['log']['level_values'][conf['log']['level']],
              conf['log']['dir'], 
              conf['log']['filename'],
              conf['log']['max_filesize'], 
              conf['log']['max_files'])

    #
    #   Création du mapping
    # 

    es_mappings = json.load(open('data/es.mappings.json'))

    # Connexion ES métier
    try:
        param = [{'host': conf['connectors']['elasticsearch']['host'],
                  'port': conf['connectors']['elasticsearch']['port']}]
        es = Elasticsearch(param)
        logger.info('Connected to ES Server: %s', json.dumps(param))
    except Exception as e:
        logger.error('Connection failed to ES Server : %s', json.dumps(param))
        logger.error(e)

    # Création de l'index ES metier cible, s'il n'existe pas déjà
    index = conf['connectors']['elasticsearch']['index']
    if not es.indices.exists(index):
        logger.debug("L'index %s n'existe pas : on le crée", index)
        body_create_settings = {
            "settings" : {
                "index" : {
                    "number_of_shards" : conf['connectors']['elasticsearch']['number_of_shards'],
                    "number_of_replicas" : conf['connectors']['elasticsearch']['number_of_replicas']
                },
                "analysis" : {
                    "analyzer": {
                        "lower_keyword": {
                            "type": "custom",
                            "tokenizer": "keyword",
                            "filter": "lowercase"
                        }
                    }
                }
            }
        }
        es.indices.create(index, body=body_create_settings)
        # On doit attendre 5 secondes afin de s'assurer que l'index est créé avant de poursuivre
        time.sleep(2)

        # Création des type mapping ES
        for type_es, properties in es_mappings['georequetes'].items():
            logger.debug("Création du mapping pour le type de doc %s", type_es)
            es.indices.put_mapping(index=index, doc_type=type_es, body=properties)

        time.sleep(2)

    #
    #   Import des données initiales
    #

    # Objet swallow pour la transformation de données
    swal = Swallow()

    # Tentative de récupération des paramètres en argument
    type_doc = arguments['--type_doc'] if not type_doc else type_doc
    source_file = arguments['--source_file'] if not source_file else ('./upload/' + source_file)

    if arguments['--update']:
        if type_doc in ['referentiel_activites', 'referentiel_communes', 'communes', 'activites_connexes']:
            logger.debug("Suppression des documents de type %s", type_doc)
            es.indices.delete_mapping(conf['connectors']['elasticsearch']['index'], type_doc)
            time.sleep(1)
            es.indices.put_mapping(index=conf['connectors']['elasticsearch']['index'], doc_type=type_doc, body=es_mappings['georequetes'][type_doc])
            time.sleep(1)

    if arguments['--init']:
        try:
            logger.debug("Suppression des documents de type %s", type_doc)
            es.indices.delete_mapping(conf['connectors']['elasticsearch']['index'], type_doc)
            time.sleep(1)
        except TransportError as e:
            logger.info("Le type de document %s n'existe pas sur l'index %s", type_doc, conf['connectors']['elasticsearch']['index'])
            pass

        try:
            es.indices.put_mapping(index=conf['connectors']['elasticsearch']['index'], doc_type=type_doc, body=es_mappings['georequetes'][type_doc])
            time.sleep(1)
        except KeyError as e:
            logger.info("Aucun mapping personnlisé n'a été spécifié pour le type de document %s : mapping auto.", type_doc)
            pass

    # On lit dans un fichier
    if type_doc in ['communes','departements','regions']:
        reader = JsonFileio()
        swal.set_reader(reader, p_file=source_file)
    elif type_doc in ['communes_pj']:
        reader = CSVio()
        swal.set_reader(reader, p_file=source_file, p_delimiter='|')
    elif type_doc in ['requetes']:
        reader = ESio(conf['connectors']['elasticsearch']['host'], 
                  conf['connectors']['elasticsearch']['port'], 
                  conf['connectors']['elasticsearch']['bulk_size'])
        swal.set_reader(reader, p_index='syn_es_data_geo')

    # On écrit dans ElasticSearch
    writer = ESio(conf['connectors']['elasticsearch']['host'],
                  conf['connectors']['elasticsearch']['port'],
                  conf['connectors']['elasticsearch']['bulk_size'])
    swal.set_writer(writer, p_index=conf['connectors']['elasticsearch']['index'], p_timeout=30)

    # On transforme la donnée avec la fonction
    swal.set_process(file_to_elasticsearch, p_type=type_doc, p_es_conn=es, p_es_index=conf['connectors']['elasticsearch']['index'], p_arguments=arguments)

    if arguments['--init']:
        logger.debug("Opération d'initialisation")
    elif arguments['--update']:
        logger.debug("Opération de mise à jour")
    else:
        logger.error("Type d'opération non défini")

    logger.debug("Indexation sur %s du type de document %s", conf['connectors']['elasticsearch']['index'], type_doc)
    
    swal.run(1)

    logger.debug("Opération terminée pour le type de document %s ", type_doc)
示例#34
0
    env.close()
    logger.info('total runtime: {}s'.format(time.time() - start_time))


def parse_args():
    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    parser.add_argument('--env-id', type=str, default='HalfCheetah-v1')
    boolean_flag(parser, 'normalize-observations', default=True)
    parser.add_argument('--seed', help='RNG seed', type=int, default=9876)
    parser.add_argument('--batch-size', type=int, default=64)
    parser.add_argument('--actor-lr', type=float, default=1e-4)
    parser.add_argument('--critic-lr', type=float, default=1e-3)
    parser.add_argument('--gamma', type=float, default=0.99)
    parser.add_argument('--nb-epochs', type=int, default=500)  # with default settings, perform 1M steps total
    parser.add_argument('--nb-epoch-cycles', type=int, default=20)
    parser.add_argument('--nb-train-steps', type=int, default=50)
    parser.add_argument('--nb-rollout-steps', type=int, default=100)
    parser.add_argument('--noise-type', type=str, default='ou_0.2')  # choices are ou_xx, normal_xx, none
    args = parser.parse_args()

    dict_args = vars(args)
    return dict_args


if __name__ == '__main__':
    args = parse_args()
    logger.configure(dir='/home/nichengzhuo/ddpg_exps_new/results/base.no_mpi.modify/')
    # Run actual script.
    run(**args)
示例#35
0
文件: wsgi_app.py 项目: lvella/guv
import guv
guv.monkey_patch()

import guv.wsgi
import logger

logger.configure()


def app(environ, start_response):
    """
    This is very basic WSGI app useful for testing the performance of guv and guv.wsgi without
    the overhead of a framework such as Flask. However, it can just as easily be any other WSGI app
    callable object, such as a Flask or Bottle app.
    """
    status = '200 OK'
    output = [b'Hello World!']
    content_length = str(len(b''.join(output)))

    response_headers = [('Content-type', 'text/plain'),
                        ('Content-Length', content_length)]

    start_response(status, response_headers)

    return output


if __name__ == '__main__':
    server_sock = guv.listen(('0.0.0.0', 8001))
    guv.wsgi.serve(server_sock, app)