def main(): # tornado.options.parse_command_line() http_server = tornado.httpserver.HTTPServer(Application()) port = 8000 if not get_env("port") else int(get_env("port")) print("server is running on port {0}".format(port)) http_server.listen(port) tornado.ioloop.IOLoop.current().start()
def __init__(self): url = "https://accounts.spotify.com/api/token" client_id = get_env("spotify_client_id") client_secret = get_env("spotify_client_secret") self.token = "" resp = requests.post(url, data={"grant_type": "client_credentials"}, auth=(client_id, client_secret)) if resp.ok: body = resp.json() self.token = body["access_token"]
def init_components(args, unknown_args) -> Tuple[Env, Agent]: # Initialize environment env = get_env(args.env, args.num_envs, **unknown_args) # Get model class if args.model is not None: model_cls = model_registry.get(args.model) else: env_type = _get_gym_env_type(args.env) if env_type == 'atari': model_cls = model_registry.get('qcnn') elif env_type == 'classic_control': model_cls = model_registry.get('qmlp') else: raise NotImplementedError( f'No default model for environment: {args.env!r})') # Initialize agent agent_cls = agent_registry.get(args.alg) # 如果不便learner端构件env,将env.get_observation_space(), env.get_action_space()替换为具体值即可 agent = agent_cls(model_cls, env.get_observation_space(), env.get_action_space(), args.agent_config, **unknown_args) return env, agent
intra_op_parallelism_threads=2) if args.job == 'worker': # worker if args.trainer == None: args.trainer = args.agent.split('.')[0] if args.trainer == 'adv_ac': trainer_class = A3CTrainer elif args.trainer == 'q': trainer_class = AsyncQLearningTrainer # additional trainer arguments trainer_parser = trainer_class.parser() trainer_args = trainer_parser.parse_args(extra) server = tf.train.Server(cluster, job_name='worker', task_index=args.task_index, config=config) build_agent = get_agent_builder(args.agent) env = get_env(args.env_id) run(args.task_index, args.log_dir, trainer_args, server, env, trainer_class, build_agent) else: # parameter server server = tf.train.Server(cluster, job_name='ps', task_index=args.task_index, config=config) server.join()
def __init__(self): self.username = get_env("vision_recognize_user") self.password = get_env("vision_recognize_pass")
def __init__(self): self.api_key = get_env("echonest_api_key")
Requires minor code change: each agent returns $Q(\ ., \bf{a})$ instead of immediately returning the action. This is needed for the VCG mechanism that sort of acts like a Mixer. Then the `pick_action_from_Q(.)` function returns the action, just as `pick_action(.)` did before. Notes: - asdf """ device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f'device is: {device}') env = get_env('ipd') # n_actions = 2 n_local_actions = 2 n_actions = np.prod([space.n for space in env.action_space]) n_actions = 2 obs_size = 1 BATCH_SIZE=128 PLT_PERIOD = 100 # faster than 1 MAX_EPISODES = 13000 MEM_SIZE=256 # 300 OPTIM_PERIOD=1 GAMMA = 0.999 TARGET_UPDATE = 10 # in epochs EPS_START = 0.9
def __init__(self): self.client_id = get_env("gracenote_client_id") self.user_id = get_env("gracenote_user_id")
def __init__(self, api_key=""): self.api_key = api_key or get_env("musixmatch_api_key")
def __init__(self): self.HOST = "https://pl.t.petitlyrics.com/mh/1/lyrics/list.xml" self.auth_key = get_env("petitlyrics_auth_key")
def __init__(self): self.key = get_env("yamaha_key") self.ver = get_env("yamaha_ver")
def __init__(self): self.columns = self.FEATURES self.key = get_env("rekognition_key") self.secret = get_env("rekognition_secret") self.limit = 3
@staticmethod def parser(): return argparse.ArgumentParser() def __init__(self, env, build_model, args): ''' init with environment and arguments parsed by parser ''' raise NotImplementedError def setup(self): ''' setup rollout provider, queue, etc ''' raise NotImplementedError def train(self, sess): ''' consume a (partial) rollout and update model ''' raise NotImplementedError if __name__ == '__main__': from envs import test_env, get_env env = get_env('gym.BipedalWalker-v2') agent = RandomAgent(env.spec) ob = env.reset() while True: action = agent.act(ob) ob, reward, done = env.step(action) env.render() if done: ob = env.reset() print 'new episode'
def run_one_actor(index, args, unknown_args, actor_status): import tensorflow.compat.v1 as tf from tensorflow.keras.backend import set_session # Set 'allow_growth' config = tf.ConfigProto() config.gpu_options.allow_growth = True set_session(tf.Session(config=config)) # Connect to learner context = zmq.Context() context.linger = 0 # For removing linger behavior socket = context.socket(zmq.REQ) socket.connect(f'tcp://{args.ip}:{args.data_port}') # Initialize environment and model instance env = get_env(args.env, args.num_envs, **unknown_args) model = get_model(env, args) # Configure logging only in one process if index == 0: logger.configure(str(args.log_path)) else: logger.configure(str(args.log_path), format_strs=[]) # Initialize values model_id = -1 episode_infos = deque(maxlen=100) num_episode = 0 state = env.reset() nupdates = args.num_steps // args.max_steps_per_update model_init_flag = 0 for update in range(1, nupdates + 1): # Update weights new_weights, model_id = find_new_weights(model_id, args.ckpt_path) if new_weights is not None: model.set_weights(new_weights) model_init_flag = 1 elif model_init_flag == 0: continue # Collect data mb_states, mb_actions, mb_rewards, mb_dones, mb_extras = [], [], [], [], [] start_time = time.time() for _ in range(args.max_steps_per_update): mb_states.append(state) # Sample action action, value, neglogp = model.forward(state) extra_data = {'value': value, 'neglogp': neglogp} state, reward, done, info = env.step(action) mb_actions.append(action) mb_rewards.append(reward) mb_dones.append(done) mb_extras.append(extra_data) for info_i in info: maybeepinfo = info_i.get('episode') if maybeepinfo: episode_infos.append(maybeepinfo) num_episode += 1 mb_states = np.asarray(mb_states, dtype=state.dtype) mb_rewards = np.asarray(mb_rewards, dtype=np.float32) mb_actions = np.asarray(mb_actions) mb_dones = np.asarray(mb_dones, dtype=np.bool) # Adjust data format and send to learner data = prepare_training_data( model, [mb_states, mb_actions, mb_rewards, mb_dones, state, mb_extras]) socket.send(serialize(data).to_buffer()) socket.recv() send_data_interval = time.time() - start_time # Log information logger.record_tabular("steps", update * args.max_steps_per_update) logger.record_tabular("episodes", num_episode) logger.record_tabular( "mean 100 episode reward", round(np.mean([epinfo['reward'] for epinfo in episode_infos]), 2)) logger.record_tabular( "mean 100 episode length", round(np.mean([epinfo['length'] for epinfo in episode_infos]), 2)) logger.record_tabular("send data interval", send_data_interval) logger.record_tabular("send data fps", args.max_steps_per_update // send_data_interval) logger.record_tabular("total steps", nupdates * args.max_steps_per_update) logger.dump_tabular() actor_status[index] = 1
xx = list(range(epochs)) xx = [x / epochs for x in xx] eps = [math.exp(-1. * x / decay) for x in xx] eps = [ee * (eps_start - eps_end) + eps_end for ee in eps] plt.figure(1) plt.clf() plt.plot(xx, eps) plt.show() # debug_decay() # EPS_START|END not defined yet """ Global IPD setup """ env_name = 'global_ipd' env = get_env(env_name) n_actions = env.action_space.n obs_size = env.observation_space.shape[0] print(f'no_actions: {n_actions}, obs_size: {obs_size}') BATCH_SIZE = 128 GAMMA = 0.999 EPS_START = 0.9 EPS_END = 0.1 DECAY = 1 / 4 LR = 0.005 MAX_EPISODES = 5000 OPTIM_PERIOD = 1 # in steps TARGET_UPDATE = 10 # in epochs PLT_PERIOD = 10 MEM_SIZE = 10000
def __init__(self): self.HOST = "https://hackathon-api.livefans.jp/ver010000" self.client_id = get_env("livefan_client_id")