def main(): """ This function will be called for training phase. """ # How to sample minerl data is document here: # http://minerl.io/docs/tutorials/data_sampling.html data = minerl.data.make(MINERL_GYM_ENV, data_dir=MINERL_DATA_ROOT) # Sample code for illustration, add your training code below env = gym.make(MINERL_GYM_ENV) # actions = [env.action_space.sample() for _ in range(10)] # Just doing 10 samples in this example # xposes = [] # for _ in range(1): # obs = env.reset() # done = False # netr = 0 # # Limiting our code to 1024 steps in this example, you can do "while not done" to run till end # while not done: # To get better view in your training phase, it is suggested # to register progress continuously, example when 54% completed # aicrowd_helper.register_progress(0.54) # To fetch latest information from instance manager, you can run below when you want to know the state #>> parser.update_information() #>> print(parser.payload) # .payload: provide AIcrowd generated json # Example: {'state': 'RUNNING', 'score': {'score': 0.0, 'score_secondary': 0.0}, 'instances': {'1': {'totalNumberSteps': 2001, 'totalNumberEpisodes': 0, 'currentEnvironment': 'MineRLObtainDiamond-v0', 'state': 'IN_PROGRESS', 'episodes': [{'numTicks': 2001, 'environment': 'MineRLObtainDiamond-v0', 'rewards': 0.0, 'state': 'IN_PROGRESS'}], 'score': {'score': 0.0, 'score_secondary': 0.0}}}} # .current_state: provide indepth state information avaiable as dictionary (key: instance id) # Save trained model to train/ directory # Training 100% Completed aicrowd_helper.register_progress(1)
def main(): """ This function will be called for training phase. """ TRAINING_EXPERIMENT.run( config_updates={ 'data_root': MINERL_DATA_ROOT, 'task_name': MINERL_GYM_ENV, 'save_location': "train" }) aicrowd_helper.register_progress(1)
def main(): global epsilon global memory """ This function will be called for training phase. """ # How to sample minerl data is document here: # http://minerl.io/docs/tutorials/data_sampling.html data = minerl.data.make(MINERL_GYM_ENV, data_dir=MINERL_DATA_ROOT) # Sample code for illustration, add your training code below env = gym.make(MINERL_GYM_ENV) env.make_interactive(port=6666, realtime=True) aicrowd_helper.training_start() episodes = 1024 trajectory = data.load_data("v3_excellent_pluot_behemoth-4_3461-4804") for episode in range(episodes): obs = env.reset() done = False netr = 0 with alive_bar(title=f"episode: {episode}") as bar: bar.text("replaying trajectory") for state, action, reward, next_state, done in trajectory: obs, reward, done, info = env.step(action) bar() i = 0 bar.text("testing inputs") while not done: print(i % 64) action = env.action_space.noop() vec = np.zeros((64,)) vec[i % 64] = -0.5 action["vector"] = vec obs, reward, done, info = env.step(action) netr += reward bar() i += 1 aicrowd_helper.register_progress(episode / episodes) # Save trained model to train/ directory # Training 100% Completed aicrowd_helper.register_progress(1) aicrowd_helper.training_end() env.close()
def main(): """ This function will be called for training phase. **IMPORTANT NOTICE**: The trained weights in `train/` directory of this repository were generated by `mod/dqn_family.py::main` entry point, not by this script. I've not checked if this script (`train.py`) could work on the MineRL Competition's submission system. (On the Round 1, participants are to submit pre-trained agents. You have to make your training script work on the competition submission system on the Round 2.) For the detail of the options of `dqn_family_main` called below, see "README#How to Train Baseline Agent on you own" section. """ dqn_family_main() # Training 100% Completed aicrowd_helper.register_progress(1)
def main(): """ This function will be called for training phase. """ # How to sample minerl data is document here: # http://minerl.io/docs/tutorials/data_sampling.html data = minerl.data.make(MINERL_GYM_ENV, data_dir=MINERL_DATA_ROOT) # Sample code for illustration, add your training code below env = gym.make(MINERL_GYM_ENV) # For an example, lets just run one episode of MineRL for training obs = env.reset() done = False while not done: obs, reward, done, info = env.step(env.action_space.sample()) # Do your training here # To get better view in your training phase, it is suggested # to register progress continuously, example when 54% completed # aicrowd_helper.register_progress(0.54) # To fetch latest information from instance manager, you can run below when you want to know the state #>> parser.update_information() #>> print(parser.payload) # .payload: provide AIcrowd generated json # Example: {'state': 'RUNNING', 'score': {'score': 0.0, 'score_secondary': 0.0}, 'instances': {'1': {'totalNumberSteps': 2001, 'totalNumberEpisodes': 0, 'currentEnvironment': 'MineRLObtainDiamond-v0', 'state': 'IN_PROGRESS', 'episodes': [{'numTicks': 2001, 'environment': 'MineRLObtainDiamond-v0', 'rewards': 0.0, 'state': 'IN_PROGRESS'}], 'score': {'score': 0.0, 'score_secondary': 0.0}}}} # .current_state: provide indepth state information avaiable as dictionary (key: instance id) # Save trained model to train/ directory # For a demonstration, we save some dummy data. # NOTE: During Round 1 submission you upload trained agents as part of the git repository. # The training code is only ran for 5 minutes (i.e. no proper training), so you might # want to avoid overwriting any existing files here! # Remember to enable it for Round 2 submission, though! np.save("./train/parameters.npy", np.random.random((10,))) # Training 100% Completed aicrowd_helper.register_progress(1) env.close()
def main(): global epsilon global memory """ This function will be called for training phase. """ # How to sample minerl data is document here: # http://minerl.io/docs/tutorials/data_sampling.html data = minerl.data.make(MINERL_GYM_ENV, data_dir=MINERL_DATA_ROOT) # Sample code for illustration, add your training code below env = gym.make(MINERL_GYM_ENV) # pre train if (checkpoint_dir / "pretrain.h5").exists(): print("Loading pretrain weights") model.load_weights(checkpoint_dir / "pretrain.h5") else: with alive_bar(title="pretrain", calibrate=120) as bar: for current_state, action, reward, next_state, done in data.batch_iter(batch_size=2, num_epochs=5, seq_len=32): loss = model.train_on_batch([current_state["pov"].reshape(-1, 64, 64, 3), current_state["vector"].reshape(-1, 64)], action["vector"].reshape(-1, 64)) bar.text(f"loss: {loss}") bar() model.save_weights(checkpoint_dir / "pretrain.h5") model_target.set_weights(model.get_weights()) env.make_interactive(port=6666) aicrowd_helper.training_start() frame_count = 0 episodes = 1024 for episode in range(episodes): if (checkpoint_dir / f"episode-{episode}.h5").exists(): if not (checkpoint_dir / f"episode-{episode + 1}.h5").exists(): model.load_weights(checkpoint_dir / f"episode-{episode}.h5") if epsilon > epsilon_min: epsilon -= (epsilon_start - epsilon_min) / explore_ts frame_count += 6000 continue obs = env.reset() done = False netr = 0 epoch_loss = [] with alive_bar(title=f"episode: {episode}") as bar: while not done: explore = np.random.rand() < epsilon if explore: bar.text("perform action: explore") action = env.action_space.sample() else: bar.text("perform action: predict") action = env.action_space.noop() action["vector"] = model.predict([obs["pov"].reshape(-1, 64, 64, 3), obs["vector"].reshape(-1, 64)])[0] new_obs, reward, done, info = env.step(action) netr += reward memory.append((obs, action, reward, new_obs, done)) # Make sure we restrict memory size to specified limit if len(memory) > memory_size: memory.pop(0) if frame_count % train_interval == 0: bar.text("training: build replay") replay = random.sample(memory, min(batch_size, len(memory))) states_pov = np.array([a[0]["pov"] for a in replay]).reshape(-1, 64, 64, 3) states_vector = np.array([a[0]["vector"] for a in replay]).reshape(-1, 64) # new_states_pov = np.array([a[3]["pov"] for a in replay]).reshape(-1, 64, 64, 3) # new_states_vector = np.array([a[3]["vector"] for a in replay]).reshape(-1, 64) # Predict the expected utility of current state and new state bar.text("training: predict Q") Q = model_target.predict([states_pov, states_vector]) Q_new = [a[2] for a in replay] + gamma * tf.reduce_max( Q, axis=1 ) # masks = tf.one_hot([a[1]["vector"] for a in replay], 64) bar.text("training: backprop") with tf.GradientTape() as tape: # Train the model on the states and updated Q-values q_values = model([states_pov, states_vector]) # Apply the masks to the Q-values to get the Q-value for action taken # q_action = tf.reduce_sum(tf.multiply(q_values, masks), axis=1) q_action = tf.reduce_sum(q_values, axis=1) # Calculate loss between new Q-value and old Q-value loss = loss_function(Q_new, q_action) grads = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(grads, model.trainable_variables)) epoch_loss.append(loss) if epsilon > epsilon_min: epsilon -= (epsilon_start - epsilon_min) / explore_ts print("explore:", explore, "net reward:", netr, "loss:", loss, "epsilon:", epsilon) bar() obs = new_obs if frame_count % target_update_interval == 0: print("updated target model") model_target.set_weights(model.get_weights()) frame_count += 1 model.save_weights(checkpoint_dir / f"episode-{episode}.h5") aicrowd_helper.register_progress(episode / episodes) # Save trained model to train/ directory # Training 100% Completed aicrowd_helper.register_progress(1) aicrowd_helper.training_end() env.close()
def main(): """ This function will be called for training phase. """ # How to sample minerl data is document here: # http://minerl.io/docs/tutorials/data_sampling.html data = minerl.data.make(MINERL_GYM_ENV, data_dir=MINERL_DATA_ROOT) os.environ['KMEANS_CACHE'] = './train/kmeans_cache' os.environ['BOUNDARY_CACHE'] = './train/boundary_cache' os.environ['MINERL_DATA_ROOT'] = './data/' TRAINING_STEPS = 4000000 mod.sqil.main(argv=[ '--env', 'MineRLObtainDiamondVectorObf-v0', '--outdir', './train/results', '--gpu', '-1', # Need to be set 0 if you want to use GPU. '--steps', str(TRAINING_STEPS), '--eval-interval', '2500', '--eval-n-runs', '20', '--remove-timestamp', # save to outdir/latest '--dual-kmeans', '--kmeans-n-clusters-vc', '60', '--option-n-groups', '10' ]) # Sample code for illustration, add your training code below # env = gym.make(MINERL_GYM_ENV) # actions = [env.action_space.sample() for _ in range(10)] # Just doing 10 samples in this example # xposes = [] # for _ in range(1): # obs = env.reset() # done = False # netr = 0 # # Limiting our code to 1024 steps in this example, you can do "while not done" to run till end # while not done: # To get better view in your training phase, it is suggested # to register progress continuously, example when 54% completed # aicrowd_helper.register_progress(0.54) # To fetch latest information from instance manager, you can run below when you want to know the state #>> parser.update_information() #>> print(parser.payload) # .payload: provide AIcrowd generated json # Example: {'state': 'RUNNING', 'score': {'score': 0.0, 'score_secondary': 0.0}, 'instances': {'1': {'totalNumberSteps': 2001, 'totalNumberEpisodes': 0, 'currentEnvironment': 'MineRLObtainDiamond-v0', 'state': 'IN_PROGRESS', 'episodes': [{'numTicks': 2001, 'environment': 'MineRLObtainDiamond-v0', 'rewards': 0.0, 'state': 'IN_PROGRESS'}], 'score': {'score': 0.0, 'score_secondary': 0.0}}}} # .current_state: provide indepth state information avaiable as dictionary (key: instance id) # Save trained model to train/ directory # Training 100% Completed aicrowd_helper.register_progress(1)
def main(): writer = SummaryWriter() env = gym.make('MineRLObtainDiamondDense-v0') if FRAME_SKIP > 0: env = FrameSkip(env, FRAME_SKIP) env = ObsWrapper(env) env = MoveAxisWrapper(env, -1, 0) env = CombineActionWrapper(env) agent = Agent(env.observation_space, env.action_space) data = minerl.data.make('MineRLTreechop-v0', data_dir=MINERL_DATA_ROOT) data_source = data.sarsd_iter(num_epochs=-1, max_sequence_len=DATA_BATCH_SIZE) # data_2 = minerl.data.make('MineRLObtainDiamond-v0', data_dir=MINERL_DATA_ROOT) # data_2_source = data.sarsd_iter(num_epochs=-1, max_sequence_len=128) # behavioral cloning train_from_expert(agent, data_source) net_steps = 0 n_episode = 0 while True: obs = env.reset() done = False netr = 0 net_bonus_r = 0 nobs = None step = 0 while not done: action = agent.act(obs) nobs, reward, done, info = env.step(action) netr += reward reward += agent.bonus_reward(obs, action, nobs) net_bonus_r += reward agent.add_data(obs, action, reward, nobs, done) obs = nobs # To get better view in your training phase, it is suggested # to register progress continuously, example when 54% completed # aicrowd_helper.register_progress(0.54) # To fetch latest information from instance manager, you can run below when you want to know the state #>> parser.update_information() #>> print(parser.payload) # .payload: provide AIcrowd generated json # Example: {'state': 'RUNNING', 'score': {'score': 0.0, 'score_secondary': 0.0}, 'instances': {'1': {'totalNumberSteps': 2001, 'totalNumberEpisodes': 0, 'currentEnvironment': 'MineRLObtainDiamond-v0', 'state': 'IN_PROGRESS', 'episodes': [{'numTicks': 2001, 'environment': 'MineRLObtainDiamond-v0', 'rewards': 0.0, 'state': 'IN_PROGRESS'}], 'score': {'score': 0.0, 'score_secondary': 0.0}}}} # .current_state: provide indepth state information avaiable as dictionary (key: instance id) step += 1 net_steps += 1 if (TRAIN_INTERVAL != 0 and step % TRAIN_INTERVAL == 0) or done: total_discrim_loss = 0.0 total_value = total_ppo_loss = total_value_loss = total_entropy = 0 n_epoch = 0 while not agent.is_memory_empty(): s, a, _, _, _ = data_source.__next__() s = data_state_wrapper(s) a = data_action_wrapper(a) total_discrim_loss += agent.train_discriminator(s, a) value, ppo_loss, value_loss, entropy = agent.train_policy() total_value += value total_ppo_loss += ppo_loss total_value_loss += value_loss total_entropy += entropy n_epoch += 1 writer.add_scalar('Train/Value', value / n_epoch, net_steps) writer.add_scalar('Train/PolicyLoss', ppo_loss / n_epoch, net_steps) writer.add_scalar('Train/ValueLoss', value_loss / n_epoch, net_steps) writer.add_scalar('Train/Entropy', entropy / n_epoch, net_steps) writer.add_scalar('Train/DiscriminatorLoss', total_discrim_loss / n_epoch, net_steps) agent.save_model() writer.add_scalar('Reward/ExternalReward', netr, n_episode) writer.add_scalar('Reward/TotalReward', net_bonus_r, n_episode) n_episode += 1 agent.save_model() agent.save_model() aicrowd_helper.register_progress(1) env.close()