def play(): print("play") env = gym.make('LunarLander-v2') state = env.reset() actor = Actor(env.action_space, env.observation_space) actor.load() #critic = Critic(env.action_space, env.observation_space) #replayMemory = ReplayMemory() #summary_ops, summary_vars = build_summaries() #writer = tf.summary.FileWriter("./log", tf.Session().graph) #episode_reward = 0 #step = 1 while True: env.render() state1 = state[np.newaxis, :] action, action_matrix, prob = actor.predict(state1) next_state, reward, done, info = env.step(action) #replayMemory.add(state, action_matrix, reward, done, next_state, prob) state = next_state if done: #summary_str = tf.Session().run(summary_ops, feed_dict={summary_vars[0]: episode_reward}) #writer.add_summary(summary_str, step) #writer.flush() state = env.reset() return 0
def get_actions(self, fuid=None): """Create and return a hash of all possible actions this player might perform""" acts = {} if self.ap <= 0: # No actions are possible at negative AP return acts if fuid is None: action_id = None name = None else: action_id = fuid.split('.') action_id[1] = int(action_id[1]) name = action_id[2] # What can we do of ourselves? # We could say "Boo!" (debug action) # FIXME: Remove this if Util.match_id(action_id, self, "sayboo"): uid = Action.make_id(self, "sayboo") acts[uid] = Action(uid, self, caption="Say Boo", action=lambda d: self.say_boo(), group="player") # We can change the held item. if Util.match_id(action_id, self, "changeitem"): uid = Action.make_id(self, "changeitem") act_html = "Use item <input id='%s_id' size='3' />. " % uid acts[uid] = ActionChangeItem( uid, self, caption="Change", cost=Cost(), group="inventory", action=lambda d: self.change_item_action(d), html=act_html, parameters=['id']) acts[uid].html += acts[uid].make_button_for() # What can we do to the item we're holding? item = self.held_item() # Match any action at this stage if item is not None and Util.match_id(action_id, item): item.external_actions(acts, self, name) # What can we do to the items we're wearing? # FIXME: Fill in here # What can we do to the current location? loc = self.loc() if Util.match_id(action_id, loc): loc.external_actions(acts, self, name) # What can we do to actors here? for actid in self.loc().actor_ids(): actor = Actor.load(actid) if Util.match_id(action_id, actor): actor.external_actions(acts, self, name) # What can we do to actors nearby? # FIXME: Fill in here return acts
def actor_handler(req, player, target, components): """Handle a request for actor information, for the given target ID""" # We must have precisely one component in the request URL if len(components) != 1: return apache.HTTP_NOT_FOUND req.content_type = "text/plain" actor = Actor.load(target) context = player.get_context() # Check for actions first -- simplifies the handling of action POSTs if components[0] == 'actions': if req.method == 'GET': # List of actions acts = actor.get_actions() for id, act in acts.iteritems(): info = act.context_get(context) Util.render_info(info, req) req.write("-\n") elif req.method == 'POST': data = Util.parse_input(req) if 'action' in data: actor.perform_action(data['action'], data) # Save any game state that might have changed GameUtil.save() else: # If it's not GET or POST, complain return apache.HTTP_METHOD_NOT_ALLOWED return apache.OK # Now handle everything else: it's all GETs from here on if req.method != 'GET': # If it's not a GET, throw a wobbly return apache.HTTP_METHOD_NOT_ALLOWED log.debug("Actor handler: requested " + str(components)) if components[0] == 'desc': # Description info = actor.context_get(context) Util.render_info(info, req) elif components[0] == 'inventory': # Inventory info = actor.inventory.context_get_equip(context) Util.render_table(info, req) elif components[0] == 'equipment': # Equipment info = actor.equipment.context_get_equip(context) Util.render_table(info, req) elif components[0] == 'log': # Actor logs if 'X-WoR-Messages-Since' in req.headers_in: since = req.headers_in['X-WoR-Messages-Since'] else: since = getattr(actor, 'last_action', 0) info = actor.get_messages(since) Util.render_table(info, req) else: return apache.HTTP_NOT_FOUND return apache.OK
def train(): env = gym.make('LunarLander-v2') state = env.reset() actor = Actor(env.action_space, env.observation_space) critic = Critic(env.action_space, env.observation_space) actor.load() critic.load() replayMemory = ReplayMemory() summary_ops, summary_vars = build_summaries() writer = tf.summary.FileWriter("./log", tf.Session().graph) episode_reward = 0 step = 1 while True: #env.render() state1 = state[np.newaxis, :] action, action_matrix, prob = actor.predict(state1) next_state, reward, done, info = env.step(action) replayMemory.add(state, action_matrix, reward, done, next_state, prob) state = next_state episode_reward += reward #train if replayMemory.size() % 128 == 0 or done == True: state_b, action_matrix_b, reward_b, done_b, next_state_b, prob_b = replayMemory.miniAll( ) reward_b = reward_b[:, np.newaxis] c_pre = critic.predict(next_state_b) state_pre_value = reward_b + c_pre * 0.7 state_value = critic.predict(state_b) count = 5000 // step if count > 500: count = 500 if count < 1: count = 1 count = 10 for _ in range(count): critic.train(state_b, state_pre_value) for _ in range(count): actor.train(state_b, state_value, state_pre_value, action_matrix_b, prob_b) replayMemory.clear() ######################## if done: summary_str = tf.Session().run( summary_ops, feed_dict={summary_vars[0]: episode_reward}) writer.add_summary(summary_str, step) writer.flush() ##print("step = ", step, "episode_reward = ", episode_reward) state = env.reset() episode_reward = 0 step += 1 if step % 25 == 0: actor.save() critic.save()