def deep_q_learning(sess, agent_host, q_estimator, target_estimator, state_processor, num_episodes, experiment_dir, replay_memory_size=50000, replay_memory_init_size=5000, update_target_estimator_every=1000, discount_factor=0.99, epsilon_start=1.0, epsilon_end=0.1, epsilon_decay_steps=8000, batch_size=32, record_video_every=100): """ Q-Learning algorithm for off-policy TD control using Function Approximation. Finds the optimal greedy policy while following an epsilon-greedy policy. Args: sess: Tensorflow Session object env: OpenAI environment q_estimator: Estimator object used for the q values target_estimator: Estimator object used for the targets state_processor: A StateProcessor object num_episodes: Number of episodes to run for experiment_dir: Directory to save Tensorflow summaries in replay_memory_size: Size of the replay memory replay_memory_init_size: Number of random experiences to sampel when initializing the reply memory. update_target_estimator_every: Copy parameters from the Q estimator to the target estimator every N steps discount_factor: Gamma discount factor epsilon_start: Chance to sample a random action when taking an action. Epsilon is decayed over time and this is the start value epsilon_end: The final minimum value of epsilon after decaying is done epsilon_decay_steps: Number of steps to decay epsilon over batch_size: Size of batches to sample from the replay memory record_video_every: Record a video every N episodes Returns: An EpisodeStats object with two numpy arrays for episode_lengths and episode_rewards. """ mission_file = agent_host.getStringArgument('mission_file') mission_file = os.path.join(mission_file, "Maze0.xml") currentMission = mission_file with open(mission_file, 'r') as f: print("Loading mission from %s" % mission_file) mission_xml = my_mission = MalmoPython.MissionSpec(mission_xml, True) my_mission.removeAllCommandHandlers() my_mission.allowAllDiscreteMovementCommands() my_mission.setViewpoint(2) my_clients = MalmoPython.ClientPool() my_clients.add(MalmoPython.ClientInfo( '', 10000)) # add Minecraft machines here as available # my_clients.add(MalmoPython.ClientInfo('', 10001)) max_retries = 3 agentID = 0 expID = 'Deep_q_learning memory' Transition = namedtuple( "Transition", ["state", "action", "reward", "next_state", "done"]) # The replay memory replay_memory = [] # Keeps track of useful statistics stats = plotting.EpisodeStats(episode_lengths=np.zeros(num_episodes), episode_rewards=np.zeros(num_episodes)) # Create directories for checkpoints and summaries checkpoint_dir = os.path.join(experiment_dir, "checkpoints") checkpoint_path = os.path.join(checkpoint_dir, "model") monitor_path = os.path.join(experiment_dir, "monitor") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) if not os.path.exists(monitor_path): os.makedirs(monitor_path) saver = tf.train.Saver() # Load a previous checkpoint if we find one latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir) if latest_checkpoint: print("Loading model checkpoint {}...\n".format(latest_checkpoint)) saver.restore(sess, latest_checkpoint) total_t = # The epsilon decay schedule epsilons = np.linspace(epsilon_start, epsilon_end, epsilon_decay_steps) # The policy we're following policy = make_epsilon_greedy_policy(q_estimator, len(actionSet)) my_mission_record = malmoutils.get_default_recording_object( agent_host, "save_%s-rep" % (expID)) for retry in range(max_retries): try: agent_host.startMission(my_mission, my_clients, my_mission_record, agentID, "%s" % (expID)) break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission:", e) exit(1) else: time.sleep(2.5) world_state = agent_host.getWorldState() while not world_state.has_mission_begun: print(".", end="") time.sleep(0.1) print("Sleeping") world_state = agent_host.getWorldState() for error in world_state.errors: print("Error:", error.text) print() agent_host.sendCommand("look -1") agent_host.sendCommand("look -1") print("Populating replay memory...") while world_state.is_mission_running and all( e.text == '{}' for e in world_state.observations): print("Sleeping....") world_state = agent_host.peekWorldState() # Populate the replay memory with initial experience while world_state.number_of_observations_since_last_state <= 0 and world_state.is_mission_running: # print("Sleeping") time.sleep(0.1) world_state = agent_host.peekWorldState() state = gridProcess( world_state ) # MALMO ENVIRONMENT Grid world NEEDED HERE/ was env.reset() state = state_processor.process(sess, state) state = np.stack([state] * 4, axis=2) for i in range(replay_memory_init_size): print("%s th replay memory" % i) mission_file = agent_host.getStringArgument('mission_file') if i % 20 == 0: mazeNum = randint(0, 4) mission_file = os.path.join(mission_file, "Maze%s.xml" % mazeNum) currentMission = mission_file else: mission_file = currentMission print("Mission File:", mission_file) action_probs = policy(sess, state, epsilons[min(total_t, epsilon_decay_steps - 1)]) action = np.random.choice(np.arange(len(action_probs)), p=action_probs) # next_state, reward, done, _ = env.step(actionSet[action]) # Malmo send command for the action # print("Sending command: ", actionSet[action]) agent_host.sendCommand(actionSet[action]) world_state = agent_host.peekWorldState() num_frames_seen = world_state.number_of_video_frames_since_last_state while world_state.is_mission_running and world_state.number_of_video_frames_since_last_state == num_frames_seen: world_state = agent_host.peekWorldState() done = not world_state.is_mission_running if world_state.is_mission_running: # Getting the reward from taking a step while world_state.number_of_rewards_since_last_state <= 0: time.sleep(0.1) world_state = agent_host.peekWorldState() reward = world_state.rewards[-1].getValue() print("1)Just received the reward: %s on action: %s " % (reward, actionSet[action])) while world_state.is_mission_running and all( e.text == '{}' for e in world_state.observations): world_state = agent_host.peekWorldState() # world_state = agent_host.getWorldState() if not world_state.is_mission_running: # reward = 0 next_state = state done = not world_state.is_mission_running print("1)Action: %s, Reward: %s, Done: %s" % (actionSet[action], reward, done)) replay_memory.append( Transition(state, action, reward, next_state, done)) # restart mission for next round of memory generation with open(mission_file, 'r') as f: print("Loading mission from %s" % mission_file) mission_xml = my_mission = MalmoPython.MissionSpec(mission_xml, True) my_mission.removeAllCommandHandlers() my_mission.allowAllDiscreteMovementCommands() my_mission.setViewpoint(2) for retry in range(max_retries): try: agent_host.startMission(my_mission, my_clients, my_mission_record, agentID, "%s" % (expID)) break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission:", e) exit(1) else: time.sleep(2.5) world_state = agent_host.getWorldState() while not world_state.has_mission_begun: print(".", end="") time.sleep(0.1) world_state = agent_host.getWorldState() agent_host.sendCommand("look -1") agent_host.sendCommand("look -1") while world_state.is_mission_running and all( e.text == '{}' for e in world_state.observations): world_state = agent_host.peekWorldState() state = gridProcess( world_state) # Malmo GetworldState? / env.reset() state = state_processor.process(sess, state) state = np.stack([state] * 4, axis=2) else: next_state = gridProcess(world_state) next_state = state_processor.process(sess, next_state) next_state = np.append(state[:, :, 1:], np.expand_dims(next_state, 2), axis=2) done = not world_state.is_mission_running print("1)Action: %s, Reward: %s, Done: %s" % (actionSet[action], reward, done)) replay_memory.append( Transition(state, action, reward, next_state, done)) state = next_state else: done = not world_state.is_mission_running if len(world_state.rewards) > 0: reward = world_state.rewards[-1].getValue() else: reward = 0 print("2)Just received the reward: %s on action: %s " % (reward, actionSet[action])) next_state = state print("2)Action: %s, Reward: %s, Done: %s" % (actionSet[action], reward, done)) replay_memory.append( Transition(state, action, reward, next_state, done)) # restart mission for next round of memory generation with open(mission_file, 'r') as f: print("Loading mission from %s" % mission_file) mission_xml = my_mission = MalmoPython.MissionSpec(mission_xml, True) my_mission.removeAllCommandHandlers() my_mission.allowAllDiscreteMovementCommands() my_mission.setViewpoint(2) for retry in range(max_retries): try: agent_host.startMission(my_mission, my_clients, my_mission_record, agentID, "%s" % (expID)) break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission:", e) exit(1) else: time.sleep(2.5) world_state = agent_host.getWorldState() while not world_state.has_mission_begun: print(".", end="") time.sleep(0.1) world_state = agent_host.getWorldState() agent_host.sendCommand("look -1") agent_host.sendCommand("look -1") while world_state.is_mission_running and all( e.text == '{}' for e in world_state.observations): world_state = agent_host.peekWorldState() state = gridProcess( world_state) # Malmo GetworldState? / env.reset() state = state_processor.process(sess, state) state = np.stack([state] * 4, axis=2) # time.sleep(0.2) print("Finished populating memory") # Record videos # Use the gym env Monitor wrapper # env = Monitor(env, # directory=monitor_path, # resume=True, # video_callable=lambda count: count % record_video_every ==0) # NEED TO RECORD THE VIDEO AND SAVE TO THE SPECIFIED DIRECTORY currentMission = mission_file for i_episode in range(num_episodes): print("%s-th episode" % i_episode) if i_episode != 0: mission_file = agent_host.getStringArgument('mission_file') if i_episode % 20 == 0: mazeNum = randint(0, 4) mission_file = os.path.join(mission_file, "Maze%s.xml" % mazeNum) currentMission = mission_file else: mission_file = currentMission with open(mission_file, 'r') as f: print("Loading mission from %s" % mission_file) mission_xml = my_mission = MalmoPython.MissionSpec(mission_xml, True) my_mission.removeAllCommandHandlers() my_mission.allowAllDiscreteMovementCommands() # my_mission.requestVideo(320, 240) my_mission.forceWorldReset() my_mission.setViewpoint(2) my_clients = MalmoPython.ClientPool() my_clients.add(MalmoPython.ClientInfo( '', 10000)) # add Minecraft machines here as available max_retries = 3 agentID = 0 expID = 'Deep_q_learning ' my_mission_record = malmoutils.get_default_recording_object( agent_host, "save_%s-rep%d" % (expID, i)) for retry in range(max_retries): try: agent_host.startMission(my_mission, my_clients, my_mission_record, agentID, "%s-%d" % (expID, i)) break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission:", e) exit(1) else: time.sleep(2.5) world_state = agent_host.getWorldState() print("Waiting for the mission to start", end=' ') while not world_state.has_mission_begun: print(".", end="") time.sleep(0.1) world_state = agent_host.getWorldState() for error in world_state.errors: print("Error:", error.text) agent_host.sendCommand("look -1") agent_host.sendCommand("look -1") # Save the current checkpoint, checkpoint_path) while world_state.is_mission_running and all( e.text == '{}' for e in world_state.observations): world_state = agent_host.peekWorldState() # world_state = agent_host.getWorldState() state = gridProcess(world_state) # MalmoGetWorldState? state = state_processor.process(sess, state) state = np.stack([state] * 4, axis=2) loss = None # One step in the environment for t in itertools.count(): # Epsilon for this time step epsilon = epsilons[min(total_t, epsilon_decay_steps - 1)] # Add epsilon to Tensorboard episode_summary = tf.Summary() episode_summary.value.add(simple_value=epsilon, tag="epsilon") q_estimator.summary_writer.add_summary(episode_summary, total_t) # Maybe update the target estimator if total_t % update_target_estimator_every == 0: copy_model_parameters(sess, q_estimator, target_estimator) print("\nCopied model parameters to target network.") # Print out which step we're on, useful for debugging. print("\rStep {} ({}) @ Episode {}/{}, loss: {}".format( t, total_t, i_episode + 1, num_episodes, loss), end="") sys.stdout.flush() # Take a step action_probs = policy(sess, state, epsilon) action = np.random.choice(np.arange(len(action_probs)), p=action_probs) # next_state, reward, done, _ = env.step(actionSet[action]) # Malmo AgentHost send command? # print("Sending command: ", actionSet[action]) agent_host.sendCommand(actionSet[action]) world_state = agent_host.peekWorldState() num_frames_seen = world_state.number_of_video_frames_since_last_state while world_state.is_mission_running and world_state.number_of_video_frames_since_last_state == num_frames_seen: world_state = agent_host.peekWorldState() done = not world_state.is_mission_running print(" IS MISSION FINISHED? ", done) if world_state.is_mission_running: while world_state.number_of_rewards_since_last_state <= 0: time.sleep(0.1) world_state = agent_host.peekWorldState() reward = world_state.rewards[-1].getValue() print("Just received the reward: %s on action: %s " % (reward, actionSet[action])) while world_state.is_mission_running and all( e.text == '{}' for e in world_state.observations): world_state = agent_host.peekWorldState() # world_state = agent_host.getWorldState() if world_state.is_mission_running: next_state = gridProcess(world_state) next_state = state_processor.process(sess, next_state) next_state = np.append(state[:, :, 1:], np.expand_dims(next_state, 2), axis=2) else: print("Mission finished prematurely") next_state = state done = not world_state.is_mission_running # If our replay memory is full, pop the first element if len(replay_memory) == replay_memory_size: replay_memory.pop(0) # Save transition to replay memory replay_memory.append( Transition(state, action, reward, next_state, done)) # Update statistics stats.episode_rewards[i_episode] += reward stats.episode_lengths[i_episode] = t # Sample a minibatch from the replay memory samples = random.sample(replay_memory, batch_size) states_batch, action_batch, reward_batch, next_states_batch, done_batch = map( np.array, zip(*samples)) # Calculate q values and targets (Double DQN) q_values_next = q_estimator.predict(sess, next_states_batch) best_actions = np.argmax(q_values_next, axis=1) q_values_next_target = target_estimator.predict( sess, next_states_batch) targets_batch = reward_batch + np.invert(done_batch).astype(np.float32) * \ discount_factor * q_values_next_target[np.arange(batch_size), best_actions] # Perform gradient descent update states_batch = np.array(states_batch) loss = q_estimator.update(sess, states_batch, action_batch, targets_batch) if done: print("End of episode") break state = next_state total_t += 1 if done: # while world_state.number_of_rewards_since_last_state <=0: # time.sleep(0.1) # print("Sleeping...zzzz") # world_state = agent_host.peekWorldState() if len(world_state.rewards) > 0: reward = world_state.rewards[-1].getValue() else: print("IDK no reward") reward = 0 # reward = 0 print("Just received the reward: %s on action: %s " % (reward, actionSet[action])) next_state = state replay_memory.append( Transition(state, action, reward, next_state, done)) if len(replay_memory) == replay_memory_size: replay_memory.pop(0) stats.episode_rewards[i_episode] += reward stats.episode_lengths[i_episode] = t samples = random.sample(replay_memory, batch_size) states_batch, action_batch, reward_batch, next_states_batch, done_batch = map( np.array, zip(*samples)) # Calculate q values and targets (Double DQN) q_values_next = q_estimator.predict(sess, next_states_batch) best_actions = np.argmax(q_values_next, axis=1) q_values_next_target = target_estimator.predict( sess, next_states_batch) targets_batch = reward_batch + np.invert(done_batch).astype(np.float32) * \ discount_factor * q_values_next_target[np.arange(batch_size), best_actions] # Perform gradient descent update states_batch = np.array(states_batch) loss = q_estimator.update(sess, states_batch, action_batch, targets_batch) print("End of Episode") break # state = next_state # total_t += 1 # Add summaries to tensorboard print("Adding to tensorboard summaries !!!!") episode_summary = tf.Summary() episode_summary.value.add( simple_value=stats.episode_rewards[i_episode], node_name="episode_reward", tag="episode_reward") episode_summary.value.add( simple_value=stats.episode_lengths[i_episode], node_name="episode_length", tag="episode_length") q_estimator.summary_writer.add_summary(episode_summary, total_t) q_estimator.summary_writer.flush() yield total_t, plotting.EpisodeStats( episode_lengths=stats.episode_lengths[:i_episode + 1], episode_rewards=stats.episode_rewards[:i_episode + 1]) # time.sleep(0.2) # env.monitor.close() return stats
missionXML += plug_in_dimensions(mob[1]) missionXML += '''</DrawingDecorator>''' return missionXML my_client_pool = MalmoPython.ClientPool() my_client_pool.add(MalmoPython.ClientInfo("", 10000)) if sys.version_info[0] == 2: sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0) # flush print output immediately else: import functools print = functools.partial(print, flush=True) my_mission = MalmoPython.MissionSpec( getMissionXML(endCondition, timeoutCondition), True) my_mission_record = MalmoPython.MissionRecordSpec() max_retries = 3 for retry in range(max_retries): try: agent_host.startMission(my_mission, my_client_pool, my_mission_record, 0, "blahblah") break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission", e) print("Is the game running?") exit(1) else: time.sleep(2)
print = functools.partial(print, flush=True) # Create default Malmo objects: agent_host = MalmoPython.AgentHost() try: agent_host.parse(sys.argv) except RuntimeError as e: print('ERROR:', e) print(agent_host.getUsage()) exit(1) if agent_host.receivedArgument("help"): print(agent_host.getUsage()) exit(0) my_mission = MalmoPython.MissionSpec() my_mission_record = MalmoPython.MissionRecordSpec() # Attempt to start a mission: max_retries = 3 for retry in range(max_retries): try: agent_host.startMission(my_mission, my_mission_record) break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission:", e) exit(1) else: time.sleep(2)
</AgentHandlers> </AgentSection> </Mission>''' # Variety of strategies for dealing with loss of motion: commandSequences = [ "jump 1; move 1; wait 1; jump 0; move 1; wait 2", # attempt to jump over obstacle "turn 0.5; wait 1; turn 0; move 1; wait 2", # turn right a little "turn -0.5; wait 1; turn 0; move 1; wait 2", # turn left a little "move 0; attack 1; wait 5; pitch 0.5; wait 1; pitch 0; attack 1; wait 5; pitch -0.5; wait 1; pitch 0; attack 0; move 1; wait 2", # attempt to destroy some obstacles "move 0; pitch 1; wait 2; pitch 0; use 1; jump 1; wait 6; use 0; jump 0; pitch -1; wait 1; pitch 0; wait 2; move 1; wait 2" # attempt to build tower under our feet ] my_mission = MalmoPython.MissionSpec(GetMissionXML(pitfall), True) my_mission_record = MalmoPython.MissionRecordSpec() if recordingsDirectory: my_mission_record.setDestination(recordingsDirectory + "//" + "Mission_1.tgz") my_mission_record.recordRewards() my_mission_record.recordObservations() my_mission_record.recordCommands() if agent_host.receivedArgument("record_video"): my_mission_record.recordMP4(24, 2000000) if agent_host.receivedArgument("test"): my_mission.timeLimitInSeconds(20) # else mission runs forever # Attempt to start the mission: max_retries = 3
def run_mission(self): # Running the mission (taken from # -- set up the mission -- # with open(self.mission_file, 'r') as f: print("Loading mission from %s" % self.mission_file) mission_xml = my_mission = MalmoPython.MissionSpec(mission_xml, True) # add 20% holes for interest """for x in range(1,4): for z in range(1,13): if random.random()<0.1: my_mission.drawBlock( x,45,z,"lava")""" max_retries = 3 checkpoint_iter = 100 if"test"): num_repeats = 1 else: num_repeats = 150 cumulative_rewards = [] for i in range(num_repeats): print() print('Repeat %d of %d' % (i + 1, num_repeats)) my_mission_record = MalmoPython.MissionRecordSpec() for retry in range(max_retries): try:, my_mission_record) break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission:", e) exit(1) else: time.sleep(2.5) print("Waiting for the mission to start", end=' ') world_state = while not world_state.has_mission_begun: print(".", end="") time.sleep(0.1) world_state = for error in world_state.errors: print("Error:", error.text) print() # -- run the agent in the world -- # cumulative_reward = print('Cumulative reward: %d' % cumulative_reward) # cumulative_rewards += [ cumulative_reward ] self.agent.logOutput() if i % checkpoint_iter == 0: self.agent.logOutput() # -- clean up -- # time.sleep(0.5) # (let the Mod reset) print("Done.") print() print("Cumulative rewards for all %d runs:" % num_repeats) print(cumulative_rewards) return
def with_punch(agent_host, punch_cmd, my_mission, my_mission_record, plm): # Step.1 build up the game # Start game and collect experience # notice('with tool punch block') # parse the input punch command # print "punch ... ing " # print punch_cmd # convert experience into new style envir/block & agent/item # this is very important action = punch_cmd[1] env_block = action[1] agt_item = action[3] # start point of action init_scene = OrderedDict() init_scene['env'] = [] init_scene['agt'] = [] init_scene['agt'].append(agt_item) init_scene['env'].append(env_block) # end point of action end_state = punch_cmd[2] end_scene = OrderedDict() end_scene['env'] = [] end_scene['agt'] = [] # print '########' # print end_state for item in end_state: end_scene['agt'].append(item + '*' + str(end_state[item])) # # test the new form new_scene = [init_scene, action, end_scene] # env_target = # target = init_scene['env'][0].split('*')[0] tool = action[-1].split('*')[0] # print 'punch => ', target, 'with : ', item if tool == 'hand': tool = 'air' behavior = ['punch', tool, target] # =========== # build the scene # with script # world_state = agent_host.getWorldState() # get from world_state # print '##########' # print world_state.observations[-1].text # planetbox = ['wheat'] # environment object # if block not in planetbox: # my_mission.drawBlock(5,5,5, block) # if block in planetbox: # my_mission.drawBlock(5,4,5, block) # Step.2 build up the basic scene # init the environment scene ! my_mission.forceWorldReset() # force the world to reset my_mission.observeGrid(0, 0, 0, 2, 2, 2, 'grid') # my_mission.observeHotBar() # block or entity # print "PLM" blockflag = None for item in plm.types_dict: if item == "EntityTypes": for word in plm.types_dict[item]: if target == word.lower(): blockflag = "EntityTypes" target = word if item == "BlockType": for word in plm.types_dict[item]: if target == word.lower(): blockflag = "BlockType" target = word if blockflag == None: print 'target is ', target, 'wrong types ,.,..' return None # raise ValueError; print "air" print 'target == >', target # print blockflag # Block and Entity is totally different! # block = "Stone" """ # if blockflag == "BlockType": if False: # if True: # Block dropbox = ['sand', 'gravel'] if block in dropbox: for i in range(5, 5+10): my_mission.drawBlock(5,i,5, str(block)) else: my_mission.drawBlock(5,5,5, str(block)) my_mission.drawBlock(4,5,5, 'stone') my_mission.drawBlock(6,5,5, 'stone') if block in dropbox: my_mission.drawBlock(5,4,5, 'stone') my_mission.drawBlock(5,4,6, 'stone') my_mission.startAtWithPitchAndYaw(5.5,4,4,0,0) # my_mission.endAt(5,4,6,1) strxml = my_mission.getAsXML(True) root = ET.fromstring(strxml) MalmoPython.MissionSpec(strxml,True) """ # Entity or Block is fine # update the block into Entity? # if blockflag == "EntityTypes": # if blockflag == "BlockType": if True: # print block # build up the fence or ironblock using stone # pig x5,y5,z5 # build fence to limit the move fence = 'sand' my_mission.drawCuboid(-0, 4, -0, 10, 4, 10, 'stone') my_mission.drawCuboid(-0, 4, -0, 10, 9, -0, 'sand') my_mission.drawCuboid(-0, 4, -0, -0, 9, 10, 'sand') my_mission.drawCuboid(-0, 4, 10, 10, 9, 10, 'sand') my_mission.drawCuboid(10, 4, -0, 10, 9, 10, 'sand') """ my_mission.drawBlock(5,4,6,fence) my_mission.drawBlock(4,4,6,fence) my_mission.drawBlock(6,4,6,fence) my_mission.drawBlock(5,5,6,fence) my_mission.drawBlock(4,5,6,fence) my_mission.drawBlock(6,5,6,fence) # my_mission.drawBlock(5,4,6,fence) # around side my_mission.drawBlock(4,4,5,fence) my_mission.drawBlock(6,4,5,fence) my_mission.drawBlock(4,5,5,fence) my_mission.drawBlock(6,5,5,fence) my_mission.drawBlock(4,4,4,fence) my_mission.drawBlock(6,4,4,fence) my_mission.drawBlock(4,5,4,fence) my_mission.drawBlock(6,5,4,fence) my_mission.drawBlock(4,4,3,fence) my_mission.drawBlock(6,4,3,fence) my_mission.drawBlock(4,5,3,fence) my_mission.drawBlock(6,5,3,fence) # back of wall my_mission.drawBlock(5,4,2,fence) my_mission.drawBlock(4,4,2,fence) my_mission.drawBlock(6,4,2,fence) my_mission.drawBlock(5,5,2,'iron_bars') my_mission.drawBlock(4,5,2,'iron_bars') my_mission.drawBlock(6,5,2,'iron_bars') # build orak floor? my_mission.drawBlock(5,3,5,fence) my_mission.drawBlock(5,3,4,fence) my_mission.drawBlock(5,3,3,fence) """ # if blockflag == "BlockType": my_mission.drawBlock(5, 4, 5, target) # print 'skip the block and directly test the entity ... ' # return None if blockflag == "EntityTypes": strxml = my_mission.getAsXML(True) root = ET.fromstring(strxml) # load in the Entity t = { 'pitch': str(0), 'type': target, 'x': "5.5", "xVel": "0", "yaw": "0", "y": "5", "yVel": "0", "z": "5", "zVel": "0" } init_item = [] init_item.append(t) for child in root.iter( '{}ServerHandlers'): edd = Element( '{}DrawingDecorator') edd.append( Element('{}DrawEntity', t)) child.append(edd) xmlstr = ET.tostring(root, encoding='utf8', method='xml') my_mission = MalmoPython.MissionSpec(xmlstr, True) print '++++++++++++++++++++++' print '++++++++++++++++++++++' print '++++++++++++++++++++++' print '++++++++++++++++++++++' print 'Now running ... ', punch_cmd # Step.3 init the agent inventory # init the agent inventory ! # print "####################" # print init_scene['agt'] my_mission = init_agent(my_mission, init_scene['agt']) # my_mission = state_to_agent(rewarder, my_mission, state) print '#####################' print '#####################' print '#####################' # print my_mission.getAsXML(True) states = [] event_s = None action = None # my_mission.forceWorldReset() states = [] event_s = None action_ = None # Step.4 run the scene(script) and collect experience # # ======== # start the Mission # with the scene # ===== # # agent_host, my_mission, my_mission_record = setup_env(params) startMission(agent_host, my_mission, my_mission_record) world_state = agent_host.getWorldState() obs = [] # the fence already limit the mob , kill it and then move forward ... # tool # make sure the time stamp record the sense states time_stamp = 0 init_detect = False while world_state.is_mission_running: world_state = agent_host.peekWorldState() if (world_state.has_mission_begun) and (init_detect is False): agent_host.sendCommand('move 0') action = [] if len(world_state.observations) > 0: world_state = agent_host.peekWorldState() obs_text = json.loads(world_state.observations[-1].text) print 'Begin test ... ' print obs_text obs_text['action'] = action init_detect = True obs.append(obs_text) if len(world_state.observations) > 0: action = [] toolstr = 'InventorySlot_0_item' toolflag = False # time stamp 1 record the init state world_state = agent_host.peekWorldState() obs_text = json.loads(world_state.observations[-1].text) obs_text['action'] = action # time_stamp += 1 # obs_text['time_stamp'] = time_stamp # print 'Here!' # print obs_text obs.append(obs_text) # # print obs_text # tool check code # time stamp 2 # action point if obs_text[toolstr] == tool: # the right tool is in hand toolflag = True pass else: if tool != 'air': # consider replace hand with air to unique the code for i in xrange(0, 39): key = 'InventorySlot_' + str(i) + '_item' if obs_text[key] == tool: agent_host.sendCommand('swapInventoryItems 0 ' + str(i)) time.sleep(1) player = obs_text["Name"] action.append(player + " swap " + tool + " to hand") pass else: for i in xrange(0, 39): key = 'InventorySlot_' + str(i) + '_item' if obs_text[key] == 'air': agent_host.sendCommand('swapInventoryItems 0 ' + str(i)) time.sleep(1) player = obs_text["Name"] action.append(player + " swap " + tool + " to hand") pass world_state = agent_host.peekWorldState() obs_text = json.loads(world_state.observations[-1].text) obs_text['action'] = action # time_stamp += 1 # obs_text['time_stamp'] = time_stamp obs.append(obs_text) # update video and world state # print 'obs text ... ==== ???' # print obs_text # print len(world_state.observations) # update video and world state again attackflag = False if toolflag: # agent attack agent_host.sendCommand('attack 1') player = obs_text["Name"] action.append(player + " attacks " + target + " with " + tool) obs_text['action'] = action attackflag = True time.sleep(1) world_state = agent_host.peekWorldState() obs_text = json.loads(world_state.observations[-1].text) obs_text['action'] = action # time_stamp += 1 # obs_text['time_stamp'] = time_stamp obs.append(obs_text) # check for target block or entity to monitor the mission end breakflag = False if blockflag == "EntityTypes": # detect nearest entity # print obs_text['near_entities'] entflag = False for ent in obs_text['near_entities']: if ent['name'] == target: entflag = True if not entflag: agent_host.sendCommand( 'move 0.5') # move forward to collect drops player = obs_text["Name"] action.append(player + " moves forward") obs_text['action'] = action time.sleep(1) breakflag = True pass if blockflag == "BlockType": # print '#####' # print obs_text entflag = False if obs_text['grid'][6] == target: entflag = True if not entflag: agent_host.sendCommand('move 0.5') player = obs_text["Name"] action.append(player + " moves forward") obs_text['action'] = action time.sleep(1) breakflag = True pass # update action for each time world_state = agent_host.peekWorldState() obs_text = json.loads(world_state.observations[-1].text) obs_text['action'] = action # time_stamp += 1 # obs_text['time_stamp'] = time_stamp obs.append(obs_text) if breakflag: # collect all breakflag here ! break # detect and collect the state world_state = agent_host.peekWorldState() obs_text = json.loads(world_state.observations[-1].text) # print 'obs text ... ==== ???' # print obs_text # print len(world_state.observations) agent_host.sendCommand('quit') player = obs_text["Name"] action = [] action.append(player + " quit the scene") obs_text['action'] = action # time_stamp += 1 # obs_text['time_stamp'] = time_stamp obs.append(obs_text) print '++++++++++++++++' print '+++++OBS++++++++' print '++++++++++++++++' print '', len(obs) for e in obs: print '#####' print obs plm.count += 1 print '---- ', str(plm.count), 'th scene running ---- ' print '###############3' print '###############3' # event_e = None # next_state = states[-1] # if overall_state(rewarder, state) == overall_state(rewarder, next_state): # flag = False # else: # flag = True state = None next_state = None flag = None # return missionflag, obs missionflag = True package = [punch_cmd, obs] return package
client_pool.add(MalmoPython.ClientInfo('', x)) # Keep score of how our robots are doing: survival_scores = [0 for x in range(NUM_AGENTS) ] # Lasted to the end of the mission without dying. apple_scores = [0 for x in range(NUM_AGENTS)] # Collecting apples is good. zombie_kill_scores = [0 for x in range(NUM_AGENTS) ] # Good! Help rescue humanity from zombie-kind. player_kill_scores = [0 for x in range(NUM_AGENTS) ] # Bad! Don't kill the other players! num_missions = 5 if INTEGRATION_TEST_MODE else 30000 for mission_no in xrange(1, num_missions + 1): print "Running mission #" + str(mission_no) # Create mission xml - use forcereset if this is the first mission. my_mission = MalmoPython.MissionSpec( getXML("true" if mission_no == 1 else "false"), True) # Generate an experiment ID for this mission. # This is used to make sure the right clients join the right servers - # if the experiment IDs don't match, the startMission request will be rejected. # In practice, if the client pool is only being used by one researcher, there # should be little danger of clients joining the wrong experiments, so a static # ID would probably suffice, though changing the ID on each mission also catches # potential problems with clients and servers getting out of step. # Note that, in this sample, the same process is responsible for all calls to startMission, # so passing the experiment ID like this is a simple matter. If the agentHosts are distributed # across different threads, processes, or machines, a different approach will be required. # (Eg generate the IDs procedurally, in a way that is guaranteed to produce the same results # for each agentHost independently.) experimentID = str(uuid.uuid4())
print agent_host.getUsage() exit(1) if agent_host.receivedArgument("help"): print agent_host.getUsage() exit(0) itemdrawingxml = GetItemDrawingXML() if agent_host.receivedArgument("test"): num_reps = 1 else: num_reps = 30000 for iRepeat in range(num_reps): my_mission = MalmoPython.MissionSpec( GetMissionXML("Nom nom nom run #" + str(iRepeat), itemdrawingxml), validate) # Set up a recording - MUST be done once for each mission - don't do this outside the loop! my_mission_record = MalmoPython.MissionRecordSpec(recordingsDirectory + "//" + "Mission_" + str(iRepeat) + ".tgz") my_mission_record.recordRewards() my_mission_record.recordMP4(24, 400000) max_retries = 3 for retry in range(max_retries): try: # Attempt to start the mission: agent_host.startMission(my_mission, my_client_pool, my_mission_record, 0, "itemTestExperiment") break except RuntimeError as e:
print 'ERROR:', e print agent_host.getUsage() exit(1) if agent_host.receivedArgument("help"): print agent_host.getUsage() exit(0) if agent_host.receivedArgument("test"): num_repeats = 1 else: num_repeats = 2 #REMMBER THIS IS THE LEVELS for i in range(num_repeats): size = int(6 + 0.5 * i) print "Size of maze:", size my_mission = MalmoPython.MissionSpec( GetMissionXML("0", 0.4 + float(i / 20.0), size), True) my_mission_record = MalmoPython.MissionRecordSpec() my_mission.requestVideo(800, 500) my_mission.setViewpoint(1) # Attempt to start a mission: max_retries = 3 my_clients = MalmoPython.ClientPool() my_clients.add(MalmoPython.ClientInfo( '', 10000)) # add Minecraft machines here as available for retry in range(max_retries): try: agent_host.startMission(my_mission, my_clients, my_mission_record, 0, "%s-%d" % ('Moshe', i)) break except RuntimeError as e:
try: os.makedirs(recordingsDirectory) except OSError as exception: if exception.errno != errno.EEXIST: # ignore error if already existed raise # Set up a recording my_mission_record = MalmoPython.MissionRecordSpec() my_mission_record.recordRewards() my_mission_record.recordObservations() for iRepeat in xrange(num_reps): my_mission_record.setDestination(recordingsDirectory + "//" + "Mission_" + str(iRepeat) + ".tgz") mazeblock = random.choice(mazeblocks) my_mission = MalmoPython.MissionSpec(GetMissionXML(mazeblock), validate) max_retries = 3 for retry in range(max_retries): try: agent_host.startMission(my_mission, my_mission_record) break except RuntimeError as e: if retry == max_retries - 1: print "Error starting mission:", e exit(1) else: time.sleep(2) print "Waiting for the mission to start", world_state = agent_host.getWorldState()
print agent_host.getUsage() exit(0) if agent_host.receivedArgument("test"): num_reps = 1 else: num_reps = 20 current_yaw = 0 best_yaw = 0 current_life = 0 cumulative_rewards = [] for iRepeat in range(num_reps): mission_xml = getMissionXML(MOB_TYPE + " Apocalypse #" + str(iRepeat)) my_mission = MalmoPython.MissionSpec(mission_xml,validate) max_retries = 3 for retry in range(max_retries): try: # Set up a recording my_mission_record = MalmoPython.MissionRecordSpec(recordingsDirectory + "//" + "Mission_" + str(iRepeat) + ".tgz") my_mission_record.recordRewards() my_mission_record.recordCommands() # Attempt to start the mission: agent_host.startMission( my_mission, my_client_pool, my_mission_record, 0, "predatorExperiment" ) break except RuntimeError as e: if retry == max_retries - 1: print "Error starting mission",e
</Mission>''' # Variety of strategies for dealing with loss of motion: commandSequences = [ "jump 1; move 1; wait 1; jump 0; move 1; wait 2", # attempt to jump over obstacle "turn 0.5; wait 1; turn 0; move 1; wait 2", # turn right a little "turn -0.5; wait 1; turn 0; move 1; wait 2", # turn left a little "move 0; attack 1; wait 5; pitch 0.5; wait 1; pitch 0; attack 1; wait 5; pitch -0.5; wait 1; pitch 0; attack 0; move 1; wait 2", # attempt to destroy some obstacles "move 0; pitch 1; wait 2; pitch 0; use 1; jump 1; wait 6; use 0; jump 0; pitch -1; wait 1; pitch 0; wait 2; move 1; wait 2" # attempt to build tower under our feet ] sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0) # flush print output immediately my_mission = MalmoPython.MissionSpec(GetMissionXML(), True) agent_host = MalmoPython.AgentHost() try: agent_host.parse(sys.argv) except RuntimeError as e: print 'ERROR:', e print agent_host.getUsage() exit(1) if agent_host.receivedArgument("help"): print agent_host.getUsage() exit(0) if agent_host.receivedArgument("test"): my_mission.timeLimitInSeconds(20) # else mission runs forever
def reset(self): log = logging.getLogger('SimpleMalmoEnvironment.reset') obstacle_locations = [[l[0], l[1]] for l in self.obstacles] landmark_locations = [[l[0], l[1]] for l in self.landmarks] del self.mission # just to be sure, i create a new mission every episode # mission related objects self.mission_xml = self.generate_malmo_environment_xml() log.debug("Obtained mission XML: \n %s", self.mission_xml) self.mission_record = MalmoPython.MissionRecordSpec() self.mission = MalmoPython.MissionSpec(self.mission_xml, True)"Loaded mission XML") # select a random start location such that is is not one of the wall cells and not one of landmarks # x, y = random.randint(0, self.size[0] - 1), random.randint(0, self.size[1] - 1) # while [x, y] in obstacle_locations or [x, y] in landmark_locations: # x, y = random.randint(0, 6), random.randint(0, 6) self.mission.setViewpoint(1) # set mission variables - landmarks, source and destination landmarks = copy.deepcopy(self.landmarks) source_loc = random.choice(landmarks) # first select the source to pick up from remaining_landmarks = [lm for lm in landmarks if lm != source_loc] # tentative destinations are other landmarks destination = random.choice(remaining_landmarks) # now randomly choose the destination from above list agent_start_loc = random.choice(remaining_landmarks) # start locations for agent; start loc != pick up source x, y = agent_start_loc[0], agent_start_loc[1] self.current_agent_location = [x, y] # malmo needs locations to be 0.5 to be in the middle of the square, else, it is at the edge self.mission.startAt(x + 0.5, 46, y + 0.5) self.item_location = landmarks.index(source_loc) self.destination = landmarks.index(destination) self.mission.drawItem(source_loc[0], 47, source_loc[1], self.landmark_types[self.destination]) retries = 3 log.debug("Final Mission XML sent to Malmo: \n %s", self.mission.getAsXML(True)) for retry in range(retries): try: malmo_env.startMission(self.mission, self.mission_record) time.sleep(10) world_state = malmo_env.getWorldState() if world_state.has_mission_begun: break except RuntimeError as e: if retry == retries - 1: log.error("Error starting mission. Max retries elapsed. Closing! %s", e.message) exit(1) else: time.sleep(10) world_state = malmo_env.getWorldState() while not world_state.has_mission_begun: log.debug("Waiting for mission to begin") time.sleep(0.1) world_state = malmo_env.getWorldState() for error in world_state.errors: log.error("Error: %s", error.text)
def __init__(self, xmlfile): self.agent_host = MalmoPython.AgentHost() self.my_mission = MalmoPython.MissionSpec(getMissionXML(xmlfile), True) self.my_mission_record = MalmoPython.MissionRecordSpec() self.objects_of_interest = ['stone_button', 'wooden_door', 'lever'] # 4 represents anything in the env that is walkable (excluding wool) self.object_to_index = { 'air': 9, 'player': 8, 'wooden_door': 2, 'wool': 3, 'stained_hardened_clay': 4, 'clay': 4, 'iron_block': 4, 'quartz_block': 4, 'fire': 5, 'lever': 6, 'stone_button': 7, 'gravel': 10, 'redstone_wire': 4 } self.index_to_object = { 255: 'unknown', 9: 'frontier', 8: 'player', 2: 'wooden_door', 3: 'wool', 4: 'wall', 5: 'fire', 6: 'lever', 7: 'stone_button', 10: 'gravel' } self.non_opaque_objects = [9, 8, 1, 2, 5, 6, 7] #state of the door to be recorded self.passable_objects = ['air', 'wooden_door'] #, 'lever', 'gravel'] self.passable_objects_with_cost = { 'air': 1, 'lever': 1, 'wooden_door': 2, 'gravel': 5 } self.floor_objects_types = [ 'redstone_wire', 'wool', 'iron_block', 'quartz_block' ] self.envsize = 50 # Env specific variables; (modify them wrt xmlfile) # self.sight= {'x': (-3, 3), 'z': (-3, 3), 'y':(-1, 1)} self.sight = {'x': (-21, 21), 'z': (-21, 21), 'y': (-1, 1)} self.angle = 50 self.range_x = abs(self.sight['x'][1] - self.sight['x'][0]) + 1 self.range_y = abs(self.sight['y'][1] - self.sight['y'][0]) + 1 self.range_z = abs(self.sight['z'][1] - self.sight['z'][0]) + 1 self.my_mission.observeGrid(self.sight['x'][0], self.sight['y'][0], self.sight['z'][0], self.sight['x'][1], self.sight['y'][1], self.sight['z'][1], 'relative_view') self.scanning_range = 15 # Goal specific variables self.num_victims_seen = 0 self.num_doors_seen = 0 self.total_victims = 3 self.total_doors = 3 self.victims_visited = np.zeros((self.envsize, self.envsize)) self.victims_visited_sparse = set() # self.start_position = {'x': -2185.5, 'y': 28.0, 'z': 167.5} self.current_position = (self.range_z // 2, self.range_x // 2) self.relative_position = { 'y': self.range_y // 2, 'z': self.range_z // 2, 'x': self.range_x // 2 } self.absolute_position = None # NOTE that we start from 0 value of x and half value for z for recording into the array # Populate with `observe()` function self.grid = None self.ypos = None self.zpos = None self.xpos = None self.yaw = None self.pitch = None self.lineOfSight = None self.masked_grid = None self.relative_map = None self.absolute_map = np.zeros( (self.range_y, self.envsize, self.envsize)) self.origin_coord = {'y': 27.0, 'z': 142.5, 'x': -2190.5} self.maze_map_dict = {}
def state_to_agent(rewarder, my_mission, state): strxml = my_mission.getAsXML(True) # print "############my_mission###############" # print strxml root = ET.fromstring(strxml) # compute the state for agent tmp = OrderedDict() for i, n in enumerate(state): if n != 0: tmp[rewarder.index_obj[i]] = int(n) init_item = [] init_slot = 35 if len(tmp) != 0: for item_name in tmp: if item_name == 'hand': # init_slot = init_slot - 1 # do nothing continue # sn = tmp[item_name] / 64 sl = tmp[item_name] % 64 if tmp[item_name] > 64: # loop for 64 for i in sn: t = { 'slot': str(init_slot), 'type': item_name, 'quantity': "64" } init_item.append(t) init_slot = init_slot - 1 if init_slot < 9: raise Exception('init_slot Error') break t = { 'slot': str(init_slot), 'type': item_name, 'quantity': str(sl) } init_item.append(t) init_slot = init_slot - 1 if init_slot < 9: raise Exception('init_slot Error') break pass # add it into agent mission configuration for child in root.iter( '{}AgentStart'): child.append( Element('{}Inventory')) for c in child: if "Inventory" in c.tag: # pass 9-35 for i_ in init_item: c.append( Element( '{}InventoryItem', i_)) xmlstr = ET.tostring(root, encoding='utf8', method='xml') my_mission = MalmoPython.MissionSpec(xmlstr, True) return my_mission
<Grid name="column"> <min x="0" y="-256" z="0" /> <max x="0" y="256" z="0" /> </Grid> </ObservationFromGrid> <VideoProducer want_depth="true"> <Width>''' + str(video_width) + '''</Width> <Height>''' + str(video_height) + '''</Height> </VideoProducer> <AbsoluteMovementCommands /> </AgentHandlers> </AgentSection> </Mission>''' validate = True my_mission = MalmoPython.MissionSpec(missionXML, validate) agent_host = MalmoPython.AgentHost() try: agent_host.parse(sys.argv) except RuntimeError as e: print 'ERROR:', e print agent_host.getUsage() exit(1) if agent_host.receivedArgument("help"): print agent_host.getUsage() exit(0) agent_host.setObservationsPolicy( MalmoPython.ObservationsPolicy.LATEST_OBSERVATION_ONLY) agent_host.setVideoPolicy(MalmoPython.VideoPolicy.LATEST_FRAME_ONLY)
def init_agent(my_mission, agent_init_inv): strxml = my_mission.getAsXML(True) # print "############my_mission###############" # print strxml # print agent_init_inv root = ET.fromstring(strxml) # to string and rebuild it from string of XML # compute the state for agent tmp = OrderedDict() for item in agent_init_inv: pair = item.split('*') tmp[pair[0]] = int(pair[1]) # print '*****************' # print tmp init_item = [] init_slot = 35 if len(tmp) != 0: for item_name in tmp: if item_name == 'hand': # init_slot = init_slot - 1 # do nothing continue # sn = tmp[item_name] / 64 sl = tmp[item_name] % 64 if tmp[item_name] > 64: # loop for 64 for i in range(sn): t = { 'slot': str(init_slot), 'type': item_name, 'quantity': "64" } init_item.append(t) init_slot = init_slot - 1 if init_slot < 9: raise Exception('init_slot Error') break t = { 'slot': str(init_slot), 'type': item_name, 'quantity': str(sl) } init_item.append(t) init_slot = init_slot - 1 if init_slot < 9: raise Exception('init_slot Error') break pass # add it into agent mission configuration for child in root.iter( '{}AgentStart'): child.append( Element('{}Inventory')) for c in child: if "Inventory" in c.tag: # pass 9-35 for i_ in init_item: c.append( Element( '{}InventoryItem', i_)) xmlstr = ET.tostring(root, encoding='utf8', method='xml') my_mission = MalmoPython.MissionSpec(xmlstr, True) return my_mission
agent_host.parse(sys.argv) except RuntimeError as e: print 'ERROR:', e print agent_host.getUsage() exit(1) if agent_host.receivedArgument("help"): print agent_host.getUsage() exit(0) if "gs" in sys.argv: search_alg = 'gs' else: search_alg = 'bfs' my_mission = MalmoPython.MissionSpec(GetMissionXML("random", 0.2), True) my_mission_record = MalmoPython.MissionRecordSpec() # Attempt to start a mission: max_retries = 3 for retry in range(max_retries): try: agent_host.startMission(my_mission, my_mission_record) break except RuntimeError as e: if retry == max_retries - 1: print "Error starting mission:", e exit(1) else: time.sleep(2)
def excute_scene(exp, plm, agent_config): # total action from agent (id , action , experience ) // { craft attack use } # scene creation agent_host = MalmoPython.AgentHost() # default scene path scene_path = agent_config rewarder = None state = None item = 'iron_axe' block = 'log' target_path = "../data/scene_experience/" agent_name = agent_config.split("/")[-1].split(".")[0] print "Agent_name : ", agent_name # common mission setting my_mission_record = MalmoPython.MissionRecordSpec() print "Now read configuration from ", scene_path my_mission = MalmoPython.MissionSpec(open(scene_path).read(), True) list_exp = json.loads(exp) print list_exp if "punch" in list_exp[1]: # punch is the most complicated action in environment """ # skip file_name = ('_').join(list_exp[1]) files_dir = os.listdir(target_path) for fn in files_dir: fns = fn.split('#') if fns[0] == file_name: return 0 """ scene_record = with_punch(agent_host, list_exp, my_mission, my_mission_record, plm) # print "package ... " # file_name = ('_').join(list_exp[1]) file_name = agent_name + "#" + file_name files_dir = os.listdir(target_path) count = 0 for fn in files_dir: fns = fn.split('#') if "#".join(fns[:-1]) == file_name: count += 1 print '#############%%%%%%%%%%%%%%%%%%%%' print target_path + file_name + "#" + str(count) fp = open(target_path + file_name + '#' + str(count), 'w') # search for file name json_txt = json.dumps(scene_record) fp.write(json_txt) fp.write('\n') fp.close() pass if "craft" in list_exp[1]: """ file_name = ('_').join(list_exp[1]) files_dir = os.listdir(target_path) for fn in files_dir: fns = fn.split('#') if fns[0] == file_name: return 0 """ scene_record = with_craft(agent_host, list_exp, my_mission, my_mission_record, plm) print 'package ... ' # skip file_name = ('_').join(list_exp[1]) file_name = agent_name + '#' + file_name # Adam#craft_activator_rail_with_iron_ingot*6_stick*2_redstone_torch*1 files_dir = os.listdir(target_path) count = 0 for fn in files_dir: fns = fn.split('#') if "#".join(fns[:-1]) == file_name: count += 1 print '#############%%%%%%%%%%%%%%%%%%%%' print target_path + file_name + "#" + str(count) fp = open(target_path + file_name + '#' + str(count), 'w') # search for file name json_txt = json.dumps(scene_record) fp.write(json_txt) fp.write('\n') fp.close() pass """ if "use" in list_exp[1]: scene_record = with_use(agent_host, list_exp, my_mission, my_mission_record, plm) pass """ # record the scene_record into file pass
def setMissionXML(self, missionXML): self.missionDesc = missionXML self.mission = MalmoPython.MissionSpec(missionXML.xml(), True) self.mission_record = MalmoPython.MissionRecordSpec()
agent_host.parse(sys.argv) except RuntimeError as e: print 'ERROR:', e print agent_host.getUsage() exit(1) if agent_host.receivedArgument("help"): print agent_host.getUsage() exit(0) if agent_host.receivedArgument("test"): num_repeats = 1 else: num_repeats = 10 for i in range(num_repeats): my_mission = MalmoPython.MissionSpec( GetMissionXML("random", float(i / 10.0)), True) my_mission_record = MalmoPython.MissionRecordSpec() # Attempt to start a mission: max_retries = 3 for retry in range(max_retries): try: agent_host.startMission(my_mission, my_mission_record) break except RuntimeError as e: if retry == max_retries - 1: print "Error starting mission:", e exit(1) else: time.sleep(2)
if __name__ == '__main__': agent_host = MalmoPython.AgentHost() try: agent_host.parse(sys.argv) except RuntimeError as e: print('ERROR:', e) print(agent_host.getUsage()) exit(1) if agent_host.receivedArgument("help"): print(agent_host.getUsage()) exit(0) n = 1 num_repeats = 200 agent = Agent(iterations=num_repeats) my_mission = MalmoPython.MissionSpec(missionXML, True) my_recording_mission = MalmoPython.MissionSpec(recordingXML, True) # Attempt to start a mission: max_retries = 3 cumulative_rewards = [] for i in range(num_repeats): for retry in range(max_retries): try: if RECORDING and (i % RECORDING_ITERATIONS == 0): my_mission_record = MalmoPython.MissionRecordSpec( "recording_" + str(i) + ".tgz") my_mission_record.recordMP4(60, 8000000) agent.recording = True
agent_host.parse(sys.argv) except RuntimeError as e: print 'ERROR:', e print agent_host.getUsage() exit(1) if agent_host.receivedArgument("help"): print agent_host.getUsage() exit(0) if agent_host.receivedArgument("test"): num_reps = 1 else: num_reps = 30000 for iRepeat in range(num_reps): my_mission = MalmoPython.MissionSpec( GetMissionXML("Crafty #" + str(iRepeat)), validate) my_mission_record = MalmoPython.MissionRecordSpec( ) # Records nothing by default max_retries = 3 for retry in range(max_retries): try: # Attempt to start the mission: agent_host.startMission(my_mission, my_client_pool, my_mission_record, 0, "craftTestExperiment") break except RuntimeError as e: if retry == max_retries - 1: print "Error starting mission", e print "Is the game running?" exit(1)
exit(0) if agent_host.receivedArgument("test"): exit( 0 ) # TODO: find a way to usefully run this sample as an integration test input_file_name = agent_host.getStringArgument("file") if input_file_name == "": print('\nERROR: Supply a file to load on the command line.\n') print(agent_host.getUsage()) exit(1) validate = True mission_file = open(agent_host.getStringArgument("file"), 'r') my_mission = MalmoPython.MissionSpec(, validate) for iRepeat in range(30000): my_mission_record = MalmoPython.MissionRecordSpec() max_retries = 3 for retry in range(max_retries): try: agent_host.startMission(my_mission, my_mission_record) break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission:", e) exit(1) else: time.sleep(2) # Create default Malmo objects: agent_host = MalmoPython.AgentHost() try: agent_host.parse(sys.argv) except RuntimeError as e: print('ERROR:', e) print(agent_host.getUsage()) exit(1) if agent_host.receivedArgument("help"): print(agent_host.getUsage()) exit(0) my_mission = MalmoPython.MissionSpec(missionXML, True) my_mission_record = MalmoPython.MissionRecordSpec() # my_mission_record.recordMP4(20, 800000) # Attempt to start a mission: max_retries = 3 for retry in range(max_retries): try: agent_host.startMission(my_mission, my_client_pool, my_mission_record, 0, "") break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission:", e) exit(1) else:
#!/usr/bin/env python import MalmoPython import time # set up out malmo client malmo = MalmoPython.AgentHost() spec = open("classroom_basic.xml").read() spec = spec.replace("__WIDTH__", "640") spec = spec.replace("__HEIGHT__", "480") spec = spec.replace("__EPISODE_TIME_MS__", "10000000") mission = MalmoPython.MissionSpec(spec, True) mission_record = MalmoPython.MissionRecordSpec() malmo.startMission(mission, mission_record) while True: world_state = malmo.getWorldState() if len(world_state.rewards) > 0: print "len?", len(world_state.rewards) print world_state.rewards[0].getValue() time.sleep(1)
) print( "NB4tf4i vörös pipacsai (Vörös Pipacs Pokol) - DEAC-Hackers Battle Royale Arena\n\n" ) print( "The aim of this first challenge, called nb4tf4i's red flowers, is to collect as many red flowers as possible before the lava flows down the hillside.\n" ) print( "Ennek az első, az nb4tf4i vörös virágai nevű kihívásnak a célja összegyűjteni annyi piros virágot, amennyit csak lehet, mielőtt a láva lefolyik a hegyoldalon.\n" ) print( "Norbert Bátfai, [email protected],\n\n" ) print("Loading mission from %s" % missionXML_file) mission_xml = my_mission = MalmoPython.MissionSpec(mission_xml, True) my_mission.drawBlock(0, 0, 0, "lava") class Hourglass: def __init__(self, charSet): self.charSet = charSet self.index = 0 def cursor(self): self.index = (self.index + 1) % len(self.charSet) return self.charSet[self.index] hg = Hourglass('|/-\|')
num_reps = 30000 for iRepeat in range(num_reps): # Set up a recording my_mission_record = malmoutils.get_default_recording_object( agent_host, "Patch_{}".format(iRepeat + 1)) # Find the point at which to create the maze: xorg = (iRepeat % 64) * 16 zorg = ((old_div(iRepeat, 64)) % 64) * 16 yorg = 200 + ((old_div(iRepeat, (64 * 64))) % 64) * 8 print("Mission " + str(iRepeat) + " --- starting at " + str(xorg) + ", " + str(yorg) + ", " + str(zorg)) # Create a mission: my_mission = MalmoPython.MissionSpec( GetMissionXML(iRepeat, xorg, yorg, zorg, iRepeat), True) max_retries = 3 for retry in range(max_retries): try: # Attempt to start the mission: agent_host.startMission(my_mission, my_client_pool, my_mission_record, 0, str(experimentID)) break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission", e) exit(1) else: time.sleep(2)
def _load_mission(self, mission_file):"Loading mission from " + mission_file) mission_xml = open(mission_file, 'r').read() self.mission_spec = MalmoPython.MissionSpec(mission_xml, True)"Loaded mission: " + self.mission_spec.getSummary())
# sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all copies or
# substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
# NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# ------------------------------------------------------------------------------------------------

import MalmoPython

my_mission = MalmoPython.MissionSpec()
my_mission.timeLimitInSeconds(10)
my_mission.drawBlock(19, 0, 19, "redstone_block")
my_mission.createDefaultTerrain()
my_mission.setTimeOfDay(6000, False)
my_mission.drawCuboid(50, 0, 50, 100, 10, 100, "redstone_block")
my_mission.drawItem(3, 0, 2, "diamond_pickaxe")
my_mission.drawSphere(50, 10, 50, 10, "ice")
my_mission.drawLine(50, 20, 50, 100, 20, 100, "redstone_block")
my_mission.startAt(2.5, 0.0, 2.5)
my_mission.endAt(19.5, 0.0, 19.5, 1.0)
my_mission.requestVideo(320, 240)
my_mission.setModeToCreative()
my_mission.rewardForReachingPosition(19.5, 0.0, 19.5, 100, 1.1)
my_mission.observeRecentCommands()
my_mission.observeHotBar()