def main(args): U.make_session(num_cpu=1).__enter__() set_global_seeds(args.seed) env = gym.make(args.env_id) task_name = get_task_short_name(args) logger.configure(dir='log_trpo_cartpole/%s' % task_name) def policy_fn(name, ob_space, ac_space, reuse=False): return build_policy(env, 'mlp', value_network='copy') import logging import os.path as osp import bench env = bench.Monitor( env, logger.get_dir() and osp.join(logger.get_dir(), "monitor.json")) env.seed(args.seed) gym.logger.setLevel(logging.WARN) args.checkpoint_dir = osp.join(args.checkpoint_dir, task_name) args.log_dir = osp.join(args.log_dir, task_name) if args.task == 'train': from utils.mujoco_dset import Dset_gym expert_observations = np.genfromtxt( 'expert_data/cartpole/observations.csv') expert_actions = np.genfromtxt('expert_data/cartpole/actions.csv', dtype=np.int32) expert_dataset = Dset_gym(inputs=expert_observations, labels=expert_actions, randomize=True) # expert_dataset = (expert_observations, expert_actions) reward_giver = Discriminator(env, args.adversary_hidden_size, entcoeff=args.adversary_entcoeff) reward_guidance = Guidance(env, args.policy_hidden_size, expert_dataset=expert_dataset) train(env, args.seed, policy_fn, reward_giver, reward_guidance, expert_dataset, args.algo, args.g_step, args.d_step, args.policy_entcoeff, args.num_timesteps, args.save_per_iter, args.checkpoint_dir, args.log_dir, args.pretrained, args.BC_max_iter, args.loss_percent, task_name) elif args.task == 'evaluate': avg_len, avg_ret = runner(env, policy_fn, args.load_model_path, timesteps_per_batch=1024, number_trajs=100, stochastic_policy=args.stochastic_policy, save=args.save_sample) result = np.array([avg_ret, avg_len]) txt_name = args.load_model_path + 'result.txt' np.savetxt(txt_name, result, fmt="%d", delimiter=" ") print(args.load_model_path, avg_ret, avg_len) print('保存成功') else: raise NotImplementedError env.close()
def configure_logging(self): """Configure the experiment""" if self.comm is None or self.rank == 0: log_path = self.get_log_path() formats_strs = ['stdout', 'log', 'csv'] fmtstr = "configuring logger" if self.comm is not None and self.rank == 0: fmtstr += " [master]" logger.info(fmtstr) logger.configure(dir_=log_path, format_strs=formats_strs) fmtstr = "logger configured" if self.comm is not None and self.rank == 0: fmtstr += " [master]" logger.info(fmtstr) logger.info(" directory: {}".format(log_path)) logger.info(" output formats: {}".format(formats_strs)) # In the same log folder, log args in yaml in yaml file file_logger = FileLogger(uuid=self.uuid, path=self.get_log_path(), file_prefix=self.name_prefix) file_logger.set_info('note', self.args.note) file_logger.set_info('uuid', self.uuid) file_logger.set_info('task', self.args.task) file_logger.set_info('args', str(self.args)) fmtstr = "experiment configured" if self.comm is not None: fmtstr += " [{} MPI workers]".format(self.comm.Get_size()) logger.info(fmtstr) else: logger.info("configuring logger [worker #{}]".format(self.rank)) logger.configure(dir_=None, format_strs=None) logger.set_level(logger.DISABLED)
def main(): import argparse parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--env-id', type=str, default='map') parser.add_argument('--num-env', type=int, default=32) parser.add_argument('--gamma', type=float, default=0.99) parser.add_argument('--lr', type=float, default=2.5e-4) parser.add_argument('--ent-coef', type=float, default=0.01) parser.add_argument('--seed', help='RNG seed', type=int, default=0) parser.add_argument('--num-timesteps', type=int, default=int(40e6)) parser.add_argument('--next-n', type=int, default=10) parser.add_argument('--nslupdates', type=int, default=10) parser.add_argument('--nepochs', type=int, default=4) parser.add_argument('--seq-len', type=int, default=10) parser.add_argument('--K', type=int, default=1) parser.add_argument('--log', type=str, default='result/tmp2') args = parser.parse_args() logger.configure(args.log) train(args.env_id, num_timesteps=args.num_timesteps, seed=args.seed, num_env=args.num_env, gamma=args.gamma, ent_coef=args.ent_coef, next_n=args.next_n, nslupdates=args.nslupdates, nepochs=args.nepochs, seq_len=args.seq_len, K=args.K)
def main(args): render = args.render if not render: import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt from utils.utils import TabularPolicy, TabularValueFun from part1.tabular_value_iteration import ValueIteration from envs import Grid1DEnv, GridWorldEnv envs = [GridWorldEnv(seed=0), GridWorldEnv(seed=1)] for env in envs: env_name = env.__name__ exp_dir = os.getcwd() + '/data/part1/%s/policy_type%s_temperature%s/' % (env_name, args.policy_type, args.temperature) logger.configure(dir=exp_dir, format_strs=['stdout', 'log', 'csv']) args_dict = vars(args) args_dict['env'] = env_name json.dump(vars(args), open(exp_dir + '/params.json', 'w'), indent=2, sort_keys=True) policy = TabularPolicy(env) value_fun = TabularValueFun(env) algo = ValueIteration(env, value_fun, policy, policy_type=args.policy_type, render=render, temperature=args.temperature) algo.train()
def main(): # Parse the JSON arguments config_args = None try: config_args = parse_args() except: print("Add a config file using \'--config file_name.json\'") exit(1) tf.reset_default_graph() config = tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=config_args.num_envs, inter_op_parallelism_threads=config_args.num_envs) config.gpu_options.allow_growth = True sess = tf.Session(config=config) # Prepare Directories # TODO: add openai logger config_args.experiment_dir, config_args.summary_dir, config_args.checkpoint_dir, config_args.output_dir, config_args.test_dir = \ create_experiment_dirs(config_args.experiment_dir) logger.configure(config_args.experiment_dir) logger.info("Print configuration .....") logger.info(config_args) acktr = ACKTR(sess, config_args) if config_args.to_train: acktr.train() if config_args.to_test: acktr.test(total_timesteps=10000000)
def main(): """ Main method. Parameters: None Returns: None """ # configure logging logger.configure() # cli arguments args = cli.cli() # parse origin if isinstance(args.origin, Point): origin = args.origin else: origin = Point.from_string(args.origin) # parse neighbors neighbors = parse_neighbors(args.neighbors) # determine nearest nearest_neighbors = nearest(origin, args.number, neighbors) # log summary summary(origin, nearest_neighbors) return
def main(args): U.make_session(num_cpu=1).__enter__() set_global_seeds(args.seed) env = gym.make(args.env_id) task_name = get_task_short_name(args) logger.configure(dir='log_trpo_mujoco/%s' % task_name) def policy_fn(name, ob_space, ac_space, reuse=False): return MlpPolicy(name=name, ob_space=ob_space, ac_space=ac_space, reuse=reuse, hid_size=args.policy_hidden_size, num_hid_layers=2) import logging import os.path as osp import bench env = bench.Monitor( env, logger.get_dir() and osp.join(logger.get_dir(), "monitor.json")) env.seed(args.seed) gym.logger.setLevel(logging.WARN) args.checkpoint_dir = osp.join(args.checkpoint_dir, task_name) args.log_dir = osp.join(args.log_dir, task_name) if args.task == 'train': expert_dataset = Mujoco_Dset(expert_path=args.expert_path, traj_limitation=args.traj_limitation) reward_giver = Discriminator(env, args.adversary_hidden_size, entcoeff=args.adversary_entcoeff) reward_guidance = Guidance(env, args.policy_hidden_size, expert_dataset=expert_dataset) train(env, args.seed, policy_fn, reward_giver, reward_guidance, expert_dataset, args.algo, args.g_step, args.d_step, args.policy_entcoeff, args.num_timesteps, args.save_per_iter, args.checkpoint_dir, args.log_dir, args.pretrained, args.BC_max_iter, args.loss_percent, task_name) elif args.task == 'evaluate': avg_len, avg_ret = runner(env, policy_fn, args.load_model_path, timesteps_per_batch=1024, number_trajs=100, stochastic_policy=args.stochastic_policy, save=args.save_sample) result = np.array([avg_ret, avg_len]) txt_name = args.load_model_path + 'result.txt' np.savetxt(txt_name, result, fmt="%d", delimiter=" ") print(args.load_model_path, avg_ret, avg_len) print('保存成功') else: raise NotImplementedError env.close()
def configure_logger(log_dir): logger.configure(log_dir, format_strs=['log']) # global # tb # # tb = logger.Logger(log_dir, [logger.make_output_format('tensorboard', log_dir), # logger.make_output_format('csv', log_dir), # logger.make_output_format('stdout', log_dir)]) global log log = logger.log
def main(args): # configure logger, disable logging in child MPI processes (with rank > 0) arg_parser = common_arg_parser() args, unknown_args = arg_parser.parse_known_args(args) extra_args = parse_cmdline_kwargs(unknown_args) if args.extra_import is not None: import_module(args.extra_import) if MPI is None or MPI.COMM_WORLD.Get_rank() == 0: rank = 0 # logger.configure() # logger.configure(dir=log_path, format_strs=['stdout', 'log', 'csv', 'tensorboard']) logger.configure(dir=log_path, format_strs=['stdout', 'csv']) else: logger.configure(format_strs=[]) rank = MPI.COMM_WORLD.Get_rank() model, env = train(args, extra_args) if args.save_path is not None and rank == 0: save_path = osp.expanduser(args.save_path) model.save(save_path) if args.play: logger.log("Running trained model") obs = env.reset() state = model.initial_state if hasattr(model, 'initial_state') else None dones = np.zeros((1, )) episode_rew = 0 i = 0 while True: if state is not None: actions, _, state, _ = model.step(obs, S=state, M=dones) else: actions, _, _, _, _ = model.step(obs) # actions, _, _, _ = model.step(obs) obs, rew, done, _ = env.step(actions) episode_rew += rew[0] if isinstance(env, VecEnv) else rew env.render() done = done.any() if isinstance(done, np.ndarray) else done i += 1 if done: print(f'episode_rew={episode_rew}') print(i) episode_rew = 0 obs = env.reset() env.close() model.sess.close() tf.reset_default_graph() return model
def test(episodes=20, agent=None, load_path=None, ifrender=False, log=False): if log: logger.configure(dir="./log/", format_strs="stdout") if agent is None: agent = DQN(num_state=16, num_action=4) if load_path: agent.load(load_path) else: agent.load() env = Game2048Env() score_list = [] highest_list = [] for i in range(episodes): state, _, done, info = env.reset() state = log2_shaping(state) start = time.time() while True: action = agent.select_action(state, deterministic=True) next_state, _, done, info = env.step(action) next_state = log2_shaping(next_state) state = next_state if ifrender: env.render() if done: print(env.Matrix) if log: logger.logkv('episode number', i + 1) logger.logkv('episode reward', info['score']) logger.logkv('episode steps', info['steps']) logger.logkv('highest', info['highest']) logger.dumpkvs() break end = time.time() if log: print('episode time:{} s\n'.format(end - start)) score_list.append(info['score']) highest_list.append(info['highest']) print('mean score:{}, mean highest:{}'.format(np.mean(score_list), np.mean(highest_list))) print('max score:{}, max hightest:{}'.format(np.max(score_list), np.max(highest_list))) result_info = { 'mean': np.mean(score_list), 'max': np.max(score_list), 'list': score_list } print(highest_list) return result_info
def main(args): render = args.render if not render: import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt from envs import DoubleIntegratorEnv, MountainCarEnv, CartPoleEnv, SwingUpEnv from utils.utils import TabularPolicy, TabularValueFun from part1.tabular_value_iteration import ValueIteration from part2.look_ahead_policy import LookAheadPolicy from part2.discretize import Discretize envs = [ DoubleIntegratorEnv(), MountainCarEnv(), CartPoleEnv(), SwingUpEnv() ] for env in envs: env_name = env.__class__.__name__ if env_name == 'MountainCarEnv': state_discretization = 51 else: state_discretization = 21 exp_dir = os.getcwd( ) + '/data/part2_d/%s/policy_type%s_mode%s_horizon%s/' % ( env_name, args.policy_type, args.mode, args.horizon) logger.configure(dir=exp_dir, format_strs=['stdout', 'log', 'csv']) args_dict = vars(args) args_dict['env'] = env_name json.dump(vars(args), open(exp_dir + '/params.json', 'w'), indent=2, sort_keys=True) env = Discretize(env, state_discretization=state_discretization, mode=args.mode) value_fun = TabularValueFun(env) if args.policy_type == 'tabular': policy = TabularPolicy(env) elif args.policy_type == 'look_ahead': policy = LookAheadPolicy(env, value_fun, args.horizon) else: raise NotImplementedError algo = ValueIteration(env, value_fun, policy, render=render, max_itr=args.max_iter, num_rollouts=1, render_itr=5, log_itr=5) algo.train()
def configure_logger(log_dir): logger.configure(log_dir, format_strs=['log']) global tb tb = logger.Logger(log_dir, [ logger.make_output_format('tensorboard', log_dir), logger.make_output_format('csv', log_dir), logger.make_output_format('stdout', log_dir) ]) global log logger.set_level(60) log = logger.log
def main(args): U.make_session(num_cpu=1).__enter__() set_global_seeds(args.seed) from dp_env_v3 import DPEnv env = DPEnv task_name = get_task_short_name(args) if rank == 0: logger.configure(dir='log_gail/%s' % task_name) if rank != 0: logger.set_level(logger.DISABLED) def policy_fn(name, ob_space, ac_space, reuse=False): return MlpPolicy(name=name, ob_space=ob_space, ac_space=ac_space, reuse=reuse, hid_size=args.policy_hidden_size, num_hid_layers=2) import logging import os.path as osp import bench env = bench.Monitor( env, logger.get_dir() and osp.join(logger.get_dir(), "monitor.json")) env.seed(args.seed) gym.logger.setLevel(logging.WARN) task_name = get_task_name(args) args.checkpoint_dir = osp.join(args.checkpoint_dir, task_name) args.log_dir = osp.join(args.log_dir, task_name) if args.task == 'train': dataset = Mujoco_Dset(expert_path=args.expert_path, traj_limitation=args.traj_limitation) reward_giver = TransitionClassifier(env, args.adversary_hidden_size, entcoeff=args.adversary_entcoeff) train(env, args.seed, policy_fn, reward_giver, dataset, args.algo, args.g_step, args.d_step, args.policy_entcoeff, args.num_timesteps, args.save_per_iter, args.checkpoint_dir, args.log_dir, args.pretrained, args.BC_max_iter, task_name) elif args.task == 'evaluate': runner(env, policy_fn, args.load_model_path, timesteps_per_batch=1024, number_trajs=10, stochastic_policy=args.stochastic_policy, save=args.save_sample) else: raise NotImplementedError env.close()
def main(): """ Run the atari test """ parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--env', help='environment ID', default='BreakoutNoFrameskip-v4') parser.add_argument('--seed', help='RNG seed', type=int, default=0) parser.add_argument('--prioritized', type=int, default=1) parser.add_argument('--dueling', type=int, default=1) parser.add_argument('--prioritized-replay-alpha', type=float, default=0.6) parser.add_argument('--num-timesteps', type=int, default=int(1e7)) args = parser.parse_args() logger.configure() set_global_seeds(args.seed) env = make_atari(args.env) env = bench.Monitor(env, logger.get_dir()) env = wrap_atari_dqn(env) policy = partial(CnnPolicy, dueling=args.dueling == 1) # model = DQN( # env=env, # policy=policy, # learning_rate=1e-4, # buffer_size=10000, # exploration_fraction=0.1, # exploration_final_eps=0.01, # train_freq=4, # learning_starts=10000, # target_network_update_freq=1000, # gamma=0.99, # prioritized_replay=bool(args.prioritized), # prioritized_replay_alpha=args.prioritized_replay_alpha, # ) model = DQN( env=env, policy_class=CnnPolicy, learning_rate=1e-4, buffer_size=10000, double_q=False, prioritized_replay=True, prioritized_replay_alpha=0.6, dueling=True, train_freq=4, learning_starts=10000, exploration_fraction=0.1, exploration_final_eps=0.01, target_network_update_freq=1000, model_path='atari_Breakout_duel' ) # model.learn(total_timesteps=args.num_timesteps, seed=args.seed) model.load('atari_Breakout_duel') model.evaluate(100) env.close()
def main_test(args, extra_args, save_path): # configure logger, disable logging in child MPI processes (with rank > 0) if MPI is None or MPI.COMM_WORLD.Get_rank() == 0: rank = 0 logger.configure() else: logger.configure(format_strs=[]) rank = MPI.COMM_WORLD.Get_rank() model, env = test(args, extra_args, save_path)
def configure_logger(log_dir): print("save log at: {}".format(log_dir)) logger.configure(log_dir, format_strs=['log']) global tb tb = logger.Logger(log_dir, [ logger.make_output_format('tensorboard', log_dir), logger.make_output_format('csv', log_dir), logger.make_output_format('stdout', log_dir) ]) global log log = logger.log
def learn(self, total_timesteps, log_interval=5, reward_target=None, log_to_file=False): """ Initiate the training of the algorithm. :param total_timesteps: (int) total number of timesteps the agent is to run for :param log_interval: (int) how often to perform logging :param reward_target: (int) reaching the reward target stops training early :param log_to_file: (bool) specify whether output ought to be logged """ logger.configure("ICM", self.env_id, log_to_file) start_time = time.time() iteration = 0 while self.num_timesteps < total_timesteps: progress = round(self.num_timesteps / total_timesteps * 100, 2) self.collect_samples() iteration += 1 if log_interval is not None and iteration % log_interval == 0: logger.record("Progress", str(progress) + '%') logger.record("time/total timesteps", self.num_timesteps) if len(self.ep_info_buffer) > 0 and len( self.ep_info_buffer[0]) > 0: logger.record( "rollout/ep_rew_mean", np.mean( [ep_info["r"] for ep_info in self.ep_info_buffer])) logger.record("rollout/num_episodes", self.num_episodes) fps = int(self.num_timesteps / (time.time() - start_time)) logger.record("time/total_time", (time.time() - start_time)) logger.dump(step=self.num_timesteps) self.train() if reward_target is not None and np.mean( [ep_info["r"] for ep_info in self.ep_info_buffer]) > reward_target: logger.record("time/total timesteps", self.num_timesteps) if len(self.ep_info_buffer) > 0 and len( self.ep_info_buffer[0]) > 0: logger.record( "rollout/ep_rew_mean", np.mean( [ep_info["r"] for ep_info in self.ep_info_buffer])) logger.record("rollout/num_episodes", self.num_episodes) fps = int(self.num_timesteps / (time.time() - start_time)) logger.record("time/total_time", (time.time() - start_time)) logger.dump(step=self.num_timesteps) break return self
def main(args): U.make_session(num_cpu=1).__enter__() set_global_seeds(args.seed) # from dp_env_v2 import DPEnv from dp_env_v3 import DPEnv # from dp_env_test import DPEnv env = DPEnv() # env = gym.make('Humanoid-v2') task_name = get_task_short_name(args) def policy_fn(name, ob_space, ac_space, reuse=False): return MlpPolicy(name=name, ob_space=ob_space, ac_space=ac_space, reuse=reuse, hid_size=args.policy_hidden_size, num_hid_layers=2) if args.task == 'train': import logging import os.path as osp import bench if MPI is None or MPI.COMM_WORLD.Get_rank() == 0: logger.configure(dir='log_tmp/%s'%task_name) if MPI.COMM_WORLD.Get_rank() != 0: logger.set_level(logger.DISABLED) env = bench.Monitor(env, logger.get_dir() and osp.join(logger.get_dir(), "monitor.json")) env.seed(args.seed) gym.logger.setLevel(logging.WARN) task_name = get_task_short_name(args) args.checkpoint_dir = osp.join(args.checkpoint_dir, task_name) args.log_dir = osp.join(args.log_dir, task_name) train(env, args.seed, policy_fn, args.g_step, args.policy_entcoeff, args.pretrained_weight_path, args.num_timesteps, args.save_per_iter, args.checkpoint_dir, args.log_dir, task_name) elif args.task == 'evaluate': runner(env, policy_fn, args.load_model_path, timesteps_per_batch=1024, number_trajs=100, stochastic_policy=args.stochastic_policy, save=args.save_sample) else: raise NotImplementedError env.close()
def run_train(params, exp_name): for seed in params["random_seeds"]: # set seed print("Using random seed {}".format(seed)) set_seed(seed) # logger exp_dir = get_exp_name(exp_name, seed) logger.configure(exp_dir) logger.info("Print configuration .....") logger.info(params) train(params) return
def main(args): render = args.render if not render: import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt from utils.utils import TabularPolicy from utils.value_function import TabularValueFun from algos.tabular_value_iteration import ValueIteration from envs import ASRSEnv, TabularEnv, ProbDistEnv, DynamicProbEnv, StaticOrderProcess, SeasonalOrderProcess num_products = np.array(eval(args.storage_shape)).prod() assert (eval(args.dist_param) is None) or (num_products == len( eval(args.dist_param) )), 'storage_shape should be consistent with dist_param length' op = StaticOrderProcess(num_products=num_products, dist_param=eval(args.dist_param)) base_env = ASRSEnv(eval(args.storage_shape), order_process=op, origin_coord=eval(args.exit_coord)) env = TabularEnv(base_env) env_name = env.__name__ exp_dir = os.getcwd( ) + '/data/version1/%s/policy_type%s_temperature%s_envsize_%s/' % ( env_name, args.policy_type, args.temperature, np.array(eval(args.storage_shape)).prod()) logger.configure(dir=exp_dir, format_strs=['stdout', 'log', 'csv'], level=eval(args.logger_level)) args_dict = vars(args) args_dict['env'] = env_name json.dump(vars(args), open(exp_dir + '/params.json', 'w'), indent=2, sort_keys=True) policy = TabularPolicy(env) value_fun = TabularValueFun(env) algo = ValueIteration(env, value_fun, policy, policy_type=args.policy_type, render=render, temperature=args.temperature, num_rollouts=args.num_rollouts) algo.train() value_fun.save(f'{exp_dir}/value_fun.npy')
def configure_logger(log_dir, add_tb=1, add_wb=1, args=None): logger.configure(log_dir, format_strs=['log']) global tb log_types = [ logger.make_output_format('log', log_dir), logger.make_output_format('json', log_dir), logger.make_output_format('stdout', log_dir) ] if add_tb: log_types += [logger.make_output_format('tensorboard', log_dir)] if add_wb: log_types += [logger.make_output_format('wandb', log_dir, args=args)] tb = logger.Logger(log_dir, log_types) global log log = logger.log
def main(): logger.configure('logs/simulate') global T, n_bills, n_taxis, occupied results = [] for n_lanes in range(2, 10): bills, n_taxis_left, n_passengers_left = [], [], [] for seed in range(N_RUNS): np.random.seed(seed) occupied = [False for _ in range(n_lanes + 1)] T, n_bills, n_taxis, sta = 0, 0, 0, 0 lanes = [ Lane(i, n_lanes + 1, lam=0.1 / n_lanes) for i in range(n_lanes) ] enter = np.random.poisson(0.1, size=10000) while T < 10000: if sta == 0: if n_taxis < M: n_taxis += enter[T] else: sta = 1 elif n_taxis < N: sta = 0 for lane in lanes: lane.step() T += 1 bills.append(n_bills) n_taxis_left.append(n_taxis) n_passengers_left.append( np.sum([lane.n_passengers for lane in lanes])) results.append(bills) logger.record_tabular('lanes', n_lanes) logger.record_tabular('bills mean', np.mean(bills)) logger.record_tabular('bills std', np.std(bills)) logger.record_tabular('taxis mean', np.mean(n_taxis_left)) logger.record_tabular('passengers mean', np.mean(n_passengers_left)) logger.dump_tabular() df = pd.DataFrame(np.reshape(results, -1)).rename(columns={0: '# bills'}) df.insert(0, '# lanes', [i for i in range(2, 10) for _ in range(N_RUNS)], True) sns.boxplot(x='# lanes', y='# bills', data=df, showmeans=True, meanline=True) plt.grid(linestyle='--') plt.savefig('logs/simulate/boxplot.jpg') plt.show()
def main(args): render = args.render if not render: import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt from utils.utils import TabularPolicy, LookAheadPolicy, SimpleMaxPolicy from utils.value_function import CNNValueFun, FFNNValueFun, TabularValueFun from algos.function_approximate_value_iteration import FunctionApproximateValueIteration from envs import ASRSEnv, ProbDistEnv assert np.array(eval(args.storage_shape)).prod() == len( eval(args.dist_param) ), 'storage_shape should be consistent with dist_param length' env = ProbDistEnv( ASRSEnv(eval(args.storage_shape), origin_coord=eval(args.exit_coord), dist_param=eval(args.dist_param))) env_name = env.__name__ exp_dir = os.getcwd() + '/data/version3/%s/policy_type%s_envsize_%s/' % ( env_name, args.policy_type, np.array(eval(args.storage_shape)).prod()) logger.configure(dir=exp_dir, format_strs=['stdout', 'log', 'csv'], level=eval(args.logger_level)) args_dict = vars(args) args_dict['env'] = env_name json.dump(vars(args), open(exp_dir + '/params.json', 'w'), indent=2, sort_keys=True) value_fun = FFNNValueFun(env) policy = SimpleMaxPolicy(env, value_fun, num_acts=args.num_acts) # policy = LookAheadPolicy(env, # value_fun, # horizon=args.horizon, # look_ahead_type=args.policy_type, # num_acts=args.num_acts) algo = FunctionApproximateValueIteration(env, value_fun, policy, learning_rate=args.learning_rate, batch_size=args.batch_size, num_acts=args.num_acts, render=render, num_rollouts=args.num_rollouts, max_itr=args.max_iter, log_itr=5) algo.train()
def test(test_fn, constants): """Runs tests based on the provided function.""" logger.configure(constants.VERBOSE) try: test_fn() logging.info("Test Passed") except Exception as e: logging.error("Test Failed") logging.exception(e) if hasattr(e, "output"): logging.error("Exception output: %s", e.output) finally: sql_wrapper.call_drop_database(constants)
def __init__(self, url, headers, payload, query_filter=None, max_cost_points=1000, leak_rate=50, max_retries=5): """Constructor for GraphQL/Shopify request Args: url (String): Shopify GraphQL API URL headers (dict): HTTP headers payload (str): Query/Mutation string query_filter (str, optional): [description]. Defaults to None. max_cost_points (int, optional): [description]. Defaults to 1000. leak_rate (int, optional): [description]. Defaults to 50. max_retries (int, optional): [description]. Defaults to 5. """ super().__init__(max_retries=max_retries) self.log = logger.configure("default") self._url = url self._headers = headers self._payload = payload self._query_filter = query_filter
def create_gvgai_environment(env_id): from common.atari_wrappers import wrap_deepmind, make_atari, ActionDirectionEnv initial_direction = {'gvgai-testgame1': 3, 'gvgai-testgame2': 3} logger.configure() game_name = env_id.split('-lvl')[0] does_need_action_direction = False # Environment creation env = make_atari(env_id) env = bench.Monitor(env, logger.get_dir()) env = wrap_deepmind(env, episode_life=False, clip_rewards=False, frame_stack=False, scale=True) if game_name in initial_direction: print("We should model with action direction") env = ActionDirectionEnv(env, initial_direction=initial_direction[game_name]) does_need_action_direction = True return env, does_need_action_direction, game_name
def main(args): render = args.render if not render: import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt from envs import DoubleIntegratorEnv, MountainCarEnv, CartPoleEnv, SwingUpEnv from utils.utils import VectorizeMujocoEnv from part3.look_ahead_policy import LookAheadPolicy from utils.value_functions import MLPValueFun from part3.continous_value_iteration import ContinousStateValueIteration envs = [ DoubleIntegratorEnv(), MountainCarEnv(), CartPoleEnv(), SwingUpEnv() ] for env in envs: env_name = env.__class__.__name__ exp_dir = os.getcwd() + '/data/part3_b/%s/horizon%s' % (env_name, args.horizon) logger.configure(dir=exp_dir, format_strs=['stdout', 'log', 'csv']) args_dict = vars(args) args_dict['env'] = env_name json.dump(vars(args), open(exp_dir + '/params.json', 'w'), indent=2, sort_keys=True) value_fun = MLPValueFun(env, hidden_sizes=(512, 512, 512)) policy = LookAheadPolicy(env, value_fun, horizon=args.horizon, look_ahead_type=args.policy_type, num_acts=args.num_acts) algo = ContinousStateValueIteration(env, value_fun, policy, learning_rate=args.learning_rate, batch_size=args.batch_size, num_acts=args.num_acts, render=args.render, max_itr=args.max_iter, log_itr=10) algo.train()
def main(args): # configure logger, disable logging in child MPI processes (with rank > 0) arg_parser = common_arg_parser() args, unknown_args = arg_parser.parse_known_args(args) extra_args = parse_cmdline_kwargs(unknown_args) if MPI is None or MPI.COMM_WORLD.Get_rank() == 0: rank = 0 logger.configure() else: logger.configure(format_strs=[]) rank = MPI.COMM_WORLD.Get_rank() model, env = train(args, extra_args) env.close() if args.save_path is not None and rank == 0: save_path = osp.expanduser(args.save_path) model.save(save_path) if args.play: logger.log("Running trained model") env = build_env(args) # Start at start state if args.demo: env.starting_positions = get_all_states(args.env) obs = env.reset() def initialize_placeholders(nlstm=128, **kwargs): return np.zeros((args.num_env or 1, 2 * nlstm)), np.zeros((1)) state, dones = initialize_placeholders(**extra_args) while True: actions, _, state, _ = model.step(obs, S=state, M=dones) obs, _, done, _ = env.step(actions) env.render() done = done.any() if isinstance(done, np.ndarray) else done if done: obs = env.reset() env.close() return model
def instant_impulse(variant): env_name = variant['env_name'] env = get_env_from_name(env_name) env_params = variant['env_params'] eval_params = variant['eval_params'] policy_params = variant['alg_params'] policy_params.update({ 's_bound': env.observation_space, 'a_bound': env.action_space, }) build_func = get_policy(variant['algorithm_name']) if 'Fetch' in env_name or 'Hand' in env_name: s_dim = env.observation_space.spaces['observation'].shape[0] \ + env.observation_space.spaces['achieved_goal'].shape[0] + \ env.observation_space.spaces['desired_goal'].shape[0] else: s_dim = env.observation_space.shape[0] a_dim = env.action_space.shape[0] # d_dim = env_params['disturbance dim'] policy = build_func(a_dim, s_dim, policy_params) # disturber = Disturber(d_dim, s_dim, disturber_params) log_path = variant['log_path'] + '/eval/safety_eval' variant['eval_params'].update({'magnitude': 0}) logger.configure(dir=log_path, format_strs=['csv']) for magnitude in eval_params['magnitude_range']: variant['eval_params']['magnitude'] = magnitude diagnostic_dict = evaluation(variant, env, policy) string_to_print = ['magnitude', ':', str(magnitude), '|'] [ string_to_print.extend( [key, ':', str(round(diagnostic_dict[key], 2)), '|']) for key in diagnostic_dict.keys() ] print(''.join(string_to_print)) logger.logkv('magnitude', magnitude) [ logger.logkv(key, diagnostic_dict[key]) for key in diagnostic_dict.keys() ] logger.dumpkvs()
def main(): logger.configure('logs/shanghai') T = [] for root, dirs, files in os.walk('data/Taxi_070220'): for csv in files[:5000]: if not csv.startswith('.'): df = pd.read_csv(os.path.join(root, csv), header=None) df = preprocess_df(df) T.append(Taxi(df)) # draw_Hongqiao() logger.info('# long distance from Hongqiao:', np.sum([t.longs for t in T])) logger.info('# short distance from Hongqiao:', np.sum([t.shorts for t in T])) logger.info('# take customer to Hongqiao and stay:', np.sum([t.stays for t in T])) logger.info('# take customer to Hongqiao and leave:', np.sum([t.leaves for t in T]))
logpath = config.get( "common", "logs" ) pidfile = config.get( "common", "pidfile" ) except ConfigParser.Error, e: print >> sys.stderr, "Error: cannot parse config file" exit( 1 ) if not redises: print >> sys.stderr, "No redis instances are set in config." exit( 1 ) if len( redises ) != len( failovers ): print >> sys.stderr, "Incorrect config. Number of redises != number of failovers." exit( 1 ) try: logger.configure( logpath ) except Exception, e: print >> sys.stderr, "Error while configuring logger:", e exit( 1 ) failover_id = str( uuid.uuid1() ) process_start = time.time() analyzer = PingsAnalyzer( failtime, len( failovers ) ) try: httpserver = HttpServer( port, analyzer ) except socket.error, e: print >> sys.stderr, "Error while starting http server:", e exit( 1 )
#p_connector.commit() cursor.close() if __name__ == '__main__': conf = json.load(open('conf/batchs.json')) # Command line args # __doc__ contains the module docstring arguments = docopt(__doc__, version=conf['version']) if arguments['--debug']: conf['log']['level'] = 'DEBUG' configure(conf['log']['level_values'][conf['log']['level']], conf['log']['dir'], conf['log']['filename'], conf['log']['max_filesize'], conf['log']['max_files']) # Paramétrag PostgreSQL try: # Connection loading logger.debug("dbname='{db}' user='******' host='{host}' password='******'".format( db=conf['postgresql']['credentials']['db'], user=conf['postgresql']['credentials']['user'], host=conf['postgresql']['host'], passw=conf['postgresql']['credentials']['password'] )) connector = psycopg2.connect("dbname='{db}' user='******' host='{host}' password='******'".format( db=conf['postgresql']['credentials']['db'], user=conf['postgresql']['credentials']['user'], host=conf['postgresql']['host'],
def run_import(type_doc = None, source_file = None): conf = json.load(open('./init-conf.json')) # Command line args arguments = docopt(__doc__, version=conf['version']) configure(conf['log']['level_values'][conf['log']['level']], conf['log']['dir'], conf['log']['filename'], conf['log']['max_filesize'], conf['log']['max_files']) # # Création du mapping # es_mappings = json.load(open('data/es.mappings.json')) # Connexion ES métier try: param = [{'host': conf['connectors']['elasticsearch']['host'], 'port': conf['connectors']['elasticsearch']['port']}] es = Elasticsearch(param) logger.info('Connected to ES Server: %s', json.dumps(param)) except Exception as e: logger.error('Connection failed to ES Server : %s', json.dumps(param)) logger.error(e) # Création de l'index ES metier cible, s'il n'existe pas déjà index = conf['connectors']['elasticsearch']['index'] if not es.indices.exists(index): logger.debug("L'index %s n'existe pas : on le crée", index) body_create_settings = { "settings" : { "index" : { "number_of_shards" : conf['connectors']['elasticsearch']['number_of_shards'], "number_of_replicas" : conf['connectors']['elasticsearch']['number_of_replicas'] }, "analysis" : { "analyzer": { "lower_keyword": { "type": "custom", "tokenizer": "keyword", "filter": "lowercase" } } } } } es.indices.create(index, body=body_create_settings) # On doit attendre 5 secondes afin de s'assurer que l'index est créé avant de poursuivre time.sleep(2) # Création des type mapping ES for type_es, properties in es_mappings['georequetes'].items(): logger.debug("Création du mapping pour le type de doc %s", type_es) es.indices.put_mapping(index=index, doc_type=type_es, body=properties) time.sleep(2) # # Import des données initiales # # Objet swallow pour la transformation de données swal = Swallow() # Tentative de récupération des paramètres en argument type_doc = arguments['--type_doc'] if not type_doc else type_doc source_file = arguments['--source_file'] if not source_file else ('./upload/' + source_file) if arguments['--update']: if type_doc in ['referentiel_activites', 'referentiel_communes', 'communes', 'activites_connexes']: logger.debug("Suppression des documents de type %s", type_doc) es.indices.delete_mapping(conf['connectors']['elasticsearch']['index'], type_doc) time.sleep(1) es.indices.put_mapping(index=conf['connectors']['elasticsearch']['index'], doc_type=type_doc, body=es_mappings['georequetes'][type_doc]) time.sleep(1) if arguments['--init']: try: logger.debug("Suppression des documents de type %s", type_doc) es.indices.delete_mapping(conf['connectors']['elasticsearch']['index'], type_doc) time.sleep(1) except TransportError as e: logger.info("Le type de document %s n'existe pas sur l'index %s", type_doc, conf['connectors']['elasticsearch']['index']) pass try: es.indices.put_mapping(index=conf['connectors']['elasticsearch']['index'], doc_type=type_doc, body=es_mappings['georequetes'][type_doc]) time.sleep(1) except KeyError as e: logger.info("Aucun mapping personnlisé n'a été spécifié pour le type de document %s : mapping auto.", type_doc) pass # On lit dans un fichier if type_doc in ['communes','departements','regions']: reader = JsonFileio() swal.set_reader(reader, p_file=source_file) elif type_doc in ['communes_pj']: reader = CSVio() swal.set_reader(reader, p_file=source_file, p_delimiter='|') elif type_doc in ['requetes']: reader = ESio(conf['connectors']['elasticsearch']['host'], conf['connectors']['elasticsearch']['port'], conf['connectors']['elasticsearch']['bulk_size']) swal.set_reader(reader, p_index='syn_es_data_geo') # On écrit dans ElasticSearch writer = ESio(conf['connectors']['elasticsearch']['host'], conf['connectors']['elasticsearch']['port'], conf['connectors']['elasticsearch']['bulk_size']) swal.set_writer(writer, p_index=conf['connectors']['elasticsearch']['index'], p_timeout=30) # On transforme la donnée avec la fonction swal.set_process(file_to_elasticsearch, p_type=type_doc, p_es_conn=es, p_es_index=conf['connectors']['elasticsearch']['index'], p_arguments=arguments) if arguments['--init']: logger.debug("Opération d'initialisation") elif arguments['--update']: logger.debug("Opération de mise à jour") else: logger.error("Type d'opération non défini") logger.debug("Indexation sur %s du type de document %s", conf['connectors']['elasticsearch']['index'], type_doc) swal.run(1) logger.debug("Opération terminée pour le type de document %s ", type_doc)
env.close() logger.info('total runtime: {}s'.format(time.time() - start_time)) def parse_args(): parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--env-id', type=str, default='HalfCheetah-v1') boolean_flag(parser, 'normalize-observations', default=True) parser.add_argument('--seed', help='RNG seed', type=int, default=9876) parser.add_argument('--batch-size', type=int, default=64) parser.add_argument('--actor-lr', type=float, default=1e-4) parser.add_argument('--critic-lr', type=float, default=1e-3) parser.add_argument('--gamma', type=float, default=0.99) parser.add_argument('--nb-epochs', type=int, default=500) # with default settings, perform 1M steps total parser.add_argument('--nb-epoch-cycles', type=int, default=20) parser.add_argument('--nb-train-steps', type=int, default=50) parser.add_argument('--nb-rollout-steps', type=int, default=100) parser.add_argument('--noise-type', type=str, default='ou_0.2') # choices are ou_xx, normal_xx, none args = parser.parse_args() dict_args = vars(args) return dict_args if __name__ == '__main__': args = parse_args() logger.configure(dir='/home/nichengzhuo/ddpg_exps_new/results/base.no_mpi.modify/') # Run actual script. run(**args)
import guv guv.monkey_patch() import guv.wsgi import logger logger.configure() def app(environ, start_response): """ This is very basic WSGI app useful for testing the performance of guv and guv.wsgi without the overhead of a framework such as Flask. However, it can just as easily be any other WSGI app callable object, such as a Flask or Bottle app. """ status = '200 OK' output = [b'Hello World!'] content_length = str(len(b''.join(output))) response_headers = [('Content-type', 'text/plain'), ('Content-Length', content_length)] start_response(status, response_headers) return output if __name__ == '__main__': server_sock = guv.listen(('0.0.0.0', 8001)) guv.wsgi.serve(server_sock, app)