示例#1
0
文件: train.py 项目: shounin1/minerl
def main():
    """
    This function will be called for training phase.
    """
    # How to sample minerl data is document here:
    # http://minerl.io/docs/tutorials/data_sampling.html
    data = minerl.data.make(MINERL_GYM_ENV, data_dir=MINERL_DATA_ROOT)

    # Sample code for illustration, add your training code below
    env = gym.make(MINERL_GYM_ENV)

    #     actions = [env.action_space.sample() for _ in range(10)] # Just doing 10 samples in this example
    #     xposes = []
    #     for _ in range(1):
    #         obs = env.reset()
    #         done = False
    #         netr = 0

    #         # Limiting our code to 1024 steps in this example, you can do "while not done" to run till end
    #         while not done:

    # To get better view in your training phase, it is suggested
    # to register progress continuously, example when 54% completed
    # aicrowd_helper.register_progress(0.54)

    # To fetch latest information from instance manager, you can run below when you want to know the state
    #>> parser.update_information()
    #>> print(parser.payload)
    # .payload: provide AIcrowd generated json
    # Example: {'state': 'RUNNING', 'score': {'score': 0.0, 'score_secondary': 0.0}, 'instances': {'1': {'totalNumberSteps': 2001, 'totalNumberEpisodes': 0, 'currentEnvironment': 'MineRLObtainDiamond-v0', 'state': 'IN_PROGRESS', 'episodes': [{'numTicks': 2001, 'environment': 'MineRLObtainDiamond-v0', 'rewards': 0.0, 'state': 'IN_PROGRESS'}], 'score': {'score': 0.0, 'score_secondary': 0.0}}}}
    # .current_state: provide indepth state information avaiable as dictionary (key: instance id)

    # Save trained model to train/ directory
    # Training 100% Completed
    aicrowd_helper.register_progress(1)
示例#2
0
def main():
    """
    This function will be called for training phase.
    """
    TRAINING_EXPERIMENT.run(
        config_updates={
            'data_root': MINERL_DATA_ROOT,
            'task_name': MINERL_GYM_ENV,
            'save_location': "train"
        })
    aicrowd_helper.register_progress(1)
示例#3
0
def main():
    global epsilon
    global memory
    """
    This function will be called for training phase.
    """
    # How to sample minerl data is document here:
    # http://minerl.io/docs/tutorials/data_sampling.html
    data = minerl.data.make(MINERL_GYM_ENV, data_dir=MINERL_DATA_ROOT)

    # Sample code for illustration, add your training code below
    env = gym.make(MINERL_GYM_ENV)

    env.make_interactive(port=6666, realtime=True)

    aicrowd_helper.training_start()
    episodes = 1024
    trajectory = data.load_data("v3_excellent_pluot_behemoth-4_3461-4804")
    for episode in range(episodes):
        obs = env.reset()
        done = False
        netr = 0

        with alive_bar(title=f"episode: {episode}") as bar:
            bar.text("replaying trajectory")
            for state, action, reward, next_state, done in trajectory:
                obs, reward, done, info = env.step(action)
                bar()
            i = 0
            bar.text("testing inputs")
            while not done:
                print(i % 64)
                action = env.action_space.noop()
                vec = np.zeros((64,))
                vec[i % 64] = -0.5
                action["vector"] = vec
                obs, reward, done, info = env.step(action)
                netr += reward
                bar()
                i += 1

        aicrowd_helper.register_progress(episode / episodes)

    # Save trained model to train/ directory
    # Training 100% Completed
    aicrowd_helper.register_progress(1)
    aicrowd_helper.training_end()
    env.close()
def main():
    """
    This function will be called for training phase.

    **IMPORTANT NOTICE**:
    The trained weights in `train/` directory of this repository were generated by `mod/dqn_family.py::main` entry point,
    not by this script.
    I've not checked if this script (`train.py`) could work on the MineRL Competition's submission system.
    (On the Round 1, participants are to submit pre-trained agents.
    You have to make your training script work on the competition submission system on the Round 2.)

    For the detail of the options of `dqn_family_main` called below, see "README#How to Train Baseline Agent on you own" section.
    """
    dqn_family_main()

    # Training 100% Completed
    aicrowd_helper.register_progress(1)
示例#5
0
def main():
    """
    This function will be called for training phase.
    """
    # How to sample minerl data is document here:
    # http://minerl.io/docs/tutorials/data_sampling.html
    data = minerl.data.make(MINERL_GYM_ENV, data_dir=MINERL_DATA_ROOT)

    # Sample code for illustration, add your training code below
    env = gym.make(MINERL_GYM_ENV)

    # For an example, lets just run one episode of MineRL for training
    obs = env.reset()
    done = False
    while not done:
        obs, reward, done, info = env.step(env.action_space.sample())
        # Do your training here

        # To get better view in your training phase, it is suggested
        # to register progress continuously, example when 54% completed
        # aicrowd_helper.register_progress(0.54)

        # To fetch latest information from instance manager, you can run below when you want to know the state
        #>> parser.update_information()
        #>> print(parser.payload)
        # .payload: provide AIcrowd generated json
        # Example: {'state': 'RUNNING', 'score': {'score': 0.0, 'score_secondary': 0.0}, 'instances': {'1': {'totalNumberSteps': 2001, 'totalNumberEpisodes': 0, 'currentEnvironment': 'MineRLObtainDiamond-v0', 'state': 'IN_PROGRESS', 'episodes': [{'numTicks': 2001, 'environment': 'MineRLObtainDiamond-v0', 'rewards': 0.0, 'state': 'IN_PROGRESS'}], 'score': {'score': 0.0, 'score_secondary': 0.0}}}}
        # .current_state: provide indepth state information avaiable as dictionary (key: instance id)

    # Save trained model to train/ directory
    # For a demonstration, we save some dummy data.
    # NOTE: During Round 1 submission you upload trained agents as part of the git repository.
    #       The training code is only ran for 5 minutes (i.e. no proper training), so you might
    #       want to avoid overwriting any existing files here!
    #       Remember to enable it for Round 2 submission, though!
    np.save("./train/parameters.npy", np.random.random((10,)))

    # Training 100% Completed
    aicrowd_helper.register_progress(1)
    env.close()
示例#6
0
def main():
    global epsilon
    global memory
    """
    This function will be called for training phase.
    """
    # How to sample minerl data is document here:
    # http://minerl.io/docs/tutorials/data_sampling.html
    data = minerl.data.make(MINERL_GYM_ENV, data_dir=MINERL_DATA_ROOT)

    # Sample code for illustration, add your training code below
    env = gym.make(MINERL_GYM_ENV)

    # pre train
    if (checkpoint_dir / "pretrain.h5").exists():
        print("Loading pretrain weights")
        model.load_weights(checkpoint_dir / "pretrain.h5")
    else:
        with alive_bar(title="pretrain", calibrate=120) as bar:
            for current_state, action, reward, next_state, done in data.batch_iter(batch_size=2, num_epochs=5, seq_len=32):
                loss = model.train_on_batch([current_state["pov"].reshape(-1, 64, 64, 3), current_state["vector"].reshape(-1, 64)], action["vector"].reshape(-1, 64))
                bar.text(f"loss: {loss}")
                bar()
        model.save_weights(checkpoint_dir / "pretrain.h5")
    model_target.set_weights(model.get_weights())

    env.make_interactive(port=6666)

    aicrowd_helper.training_start()
    frame_count = 0
    episodes = 1024
    for episode in range(episodes):
        if (checkpoint_dir / f"episode-{episode}.h5").exists():
            if not (checkpoint_dir / f"episode-{episode + 1}.h5").exists():
                model.load_weights(checkpoint_dir / f"episode-{episode}.h5")
            if epsilon > epsilon_min:
                epsilon -= (epsilon_start - epsilon_min) / explore_ts
            frame_count += 6000
            continue

        obs = env.reset()
        done = False
        netr = 0

        epoch_loss = []
        with alive_bar(title=f"episode: {episode}") as bar:
            while not done:
                explore = np.random.rand() < epsilon
                if explore:
                    bar.text("perform action: explore")
                    action = env.action_space.sample()
                else:
                    bar.text("perform action: predict")
                    action = env.action_space.noop()
                    action["vector"] = model.predict([obs["pov"].reshape(-1, 64, 64, 3), obs["vector"].reshape(-1, 64)])[0]
                new_obs, reward, done, info = env.step(action)
                netr += reward

                memory.append((obs, action, reward, new_obs, done))
                # Make sure we restrict memory size to specified limit
                if len(memory) > memory_size:
                    memory.pop(0)

                if frame_count % train_interval == 0:
                    bar.text("training: build replay")
                    replay = random.sample(memory, min(batch_size, len(memory)))
                    states_pov = np.array([a[0]["pov"] for a in replay]).reshape(-1, 64, 64, 3)
                    states_vector = np.array([a[0]["vector"] for a in replay]).reshape(-1, 64)
                    # new_states_pov = np.array([a[3]["pov"] for a in replay]).reshape(-1, 64, 64, 3)
                    # new_states_vector = np.array([a[3]["vector"] for a in replay]).reshape(-1, 64)

                    # Predict the expected utility of current state and new state
                    bar.text("training: predict Q")
                    Q = model_target.predict([states_pov, states_vector])
                    Q_new = [a[2] for a in replay] + gamma * tf.reduce_max(
                        Q, axis=1
                    )

                    # masks = tf.one_hot([a[1]["vector"] for a in replay], 64)

                    bar.text("training: backprop")
                    with tf.GradientTape() as tape:
                        # Train the model on the states and updated Q-values
                        q_values = model([states_pov, states_vector])

                        # Apply the masks to the Q-values to get the Q-value for action taken
                        # q_action = tf.reduce_sum(tf.multiply(q_values, masks), axis=1)
                        q_action = tf.reduce_sum(q_values, axis=1)
                        # Calculate loss between new Q-value and old Q-value
                        loss = loss_function(Q_new, q_action)
                        grads = tape.gradient(loss, model.trainable_variables)
                        optimizer.apply_gradients(zip(grads, model.trainable_variables))
                    epoch_loss.append(loss)

                if epsilon > epsilon_min:
                    epsilon -= (epsilon_start - epsilon_min) / explore_ts
                print("explore:", explore, "net reward:", netr, "loss:", loss, "epsilon:", epsilon)
                bar()
                obs = new_obs
                if frame_count % target_update_interval == 0:
                    print("updated target model")
                    model_target.set_weights(model.get_weights())
                frame_count += 1
        model.save_weights(checkpoint_dir / f"episode-{episode}.h5")

        aicrowd_helper.register_progress(episode / episodes)

    # Save trained model to train/ directory
    # Training 100% Completed
    aicrowd_helper.register_progress(1)
    aicrowd_helper.training_end()
    env.close()
示例#7
0
def main():
    """
    This function will be called for training phase.
    """
    # How to sample minerl data is document here:
    # http://minerl.io/docs/tutorials/data_sampling.html
    data = minerl.data.make(MINERL_GYM_ENV, data_dir=MINERL_DATA_ROOT)

    os.environ['KMEANS_CACHE'] = './train/kmeans_cache'
    os.environ['BOUNDARY_CACHE'] = './train/boundary_cache'
    os.environ['MINERL_DATA_ROOT'] = './data/'

    TRAINING_STEPS = 4000000

    mod.sqil.main(argv=[
        '--env',
        'MineRLObtainDiamondVectorObf-v0',
        '--outdir',
        './train/results',
        '--gpu',
        '-1',  # Need to be set 0 if you want to use GPU.
        '--steps',
        str(TRAINING_STEPS),
        '--eval-interval',
        '2500',
        '--eval-n-runs',
        '20',
        '--remove-timestamp',  # save to outdir/latest
        '--dual-kmeans',
        '--kmeans-n-clusters-vc',
        '60',
        '--option-n-groups',
        '10'
    ])

    # Sample code for illustration, add your training code below
    # env = gym.make(MINERL_GYM_ENV)

    #     actions = [env.action_space.sample() for _ in range(10)] # Just doing 10 samples in this example
    #     xposes = []
    #     for _ in range(1):
    #         obs = env.reset()
    #         done = False
    #         netr = 0

    #         # Limiting our code to 1024 steps in this example, you can do "while not done" to run till end
    #         while not done:

    # To get better view in your training phase, it is suggested
    # to register progress continuously, example when 54% completed
    # aicrowd_helper.register_progress(0.54)

    # To fetch latest information from instance manager, you can run below when you want to know the state
    #>> parser.update_information()
    #>> print(parser.payload)
    # .payload: provide AIcrowd generated json
    # Example: {'state': 'RUNNING', 'score': {'score': 0.0, 'score_secondary': 0.0}, 'instances': {'1': {'totalNumberSteps': 2001, 'totalNumberEpisodes': 0, 'currentEnvironment': 'MineRLObtainDiamond-v0', 'state': 'IN_PROGRESS', 'episodes': [{'numTicks': 2001, 'environment': 'MineRLObtainDiamond-v0', 'rewards': 0.0, 'state': 'IN_PROGRESS'}], 'score': {'score': 0.0, 'score_secondary': 0.0}}}}
    # .current_state: provide indepth state information avaiable as dictionary (key: instance id)

    # Save trained model to train/ directory
    # Training 100% Completed
    aicrowd_helper.register_progress(1)
示例#8
0
文件: train.py 项目: takatoy/mine-rl
def main():
    writer = SummaryWriter()

    env = gym.make('MineRLObtainDiamondDense-v0')
    if FRAME_SKIP > 0:
        env = FrameSkip(env, FRAME_SKIP)
    env = ObsWrapper(env)
    env = MoveAxisWrapper(env, -1, 0)
    env = CombineActionWrapper(env)

    agent = Agent(env.observation_space, env.action_space)
    data = minerl.data.make('MineRLTreechop-v0', data_dir=MINERL_DATA_ROOT)
    data_source = data.sarsd_iter(num_epochs=-1, max_sequence_len=DATA_BATCH_SIZE)

    # data_2 = minerl.data.make('MineRLObtainDiamond-v0', data_dir=MINERL_DATA_ROOT)
    # data_2_source = data.sarsd_iter(num_epochs=-1, max_sequence_len=128)

    # behavioral cloning
    train_from_expert(agent, data_source)

    net_steps = 0
    n_episode = 0
    while True:
        obs = env.reset()
        done = False
        netr = 0
        net_bonus_r = 0
        nobs = None
        step = 0
        while not done:
            action = agent.act(obs)
            nobs, reward, done, info = env.step(action)
            netr += reward
            reward += agent.bonus_reward(obs, action, nobs)
            net_bonus_r += reward
            agent.add_data(obs, action, reward, nobs, done)
            obs = nobs

            # To get better view in your training phase, it is suggested
            # to register progress continuously, example when 54% completed
            # aicrowd_helper.register_progress(0.54)

            # To fetch latest information from instance manager, you can run below when you want to know the state
            #>> parser.update_information()
            #>> print(parser.payload)
            # .payload: provide AIcrowd generated json
            # Example: {'state': 'RUNNING', 'score': {'score': 0.0, 'score_secondary': 0.0}, 'instances': {'1': {'totalNumberSteps': 2001, 'totalNumberEpisodes': 0, 'currentEnvironment': 'MineRLObtainDiamond-v0', 'state': 'IN_PROGRESS', 'episodes': [{'numTicks': 2001, 'environment': 'MineRLObtainDiamond-v0', 'rewards': 0.0, 'state': 'IN_PROGRESS'}], 'score': {'score': 0.0, 'score_secondary': 0.0}}}}
            # .current_state: provide indepth state information avaiable as dictionary (key: instance id)

            step += 1
            net_steps += 1

            if (TRAIN_INTERVAL != 0 and step % TRAIN_INTERVAL == 0) or done:
                total_discrim_loss = 0.0
                total_value = total_ppo_loss = total_value_loss = total_entropy = 0
                n_epoch = 0
                while not agent.is_memory_empty():
                    s, a, _, _, _ = data_source.__next__()
                    s = data_state_wrapper(s)
                    a = data_action_wrapper(a)
                    total_discrim_loss += agent.train_discriminator(s, a)
                    value, ppo_loss, value_loss, entropy = agent.train_policy()

                    total_value += value
                    total_ppo_loss += ppo_loss
                    total_value_loss += value_loss
                    total_entropy += entropy
                    n_epoch += 1

                writer.add_scalar('Train/Value', value / n_epoch, net_steps)
                writer.add_scalar('Train/PolicyLoss', ppo_loss / n_epoch, net_steps)
                writer.add_scalar('Train/ValueLoss', value_loss / n_epoch, net_steps)
                writer.add_scalar('Train/Entropy', entropy / n_epoch, net_steps)
                writer.add_scalar('Train/DiscriminatorLoss', total_discrim_loss / n_epoch, net_steps)
                agent.save_model()

        writer.add_scalar('Reward/ExternalReward', netr, n_episode)
        writer.add_scalar('Reward/TotalReward', net_bonus_r, n_episode)
        n_episode += 1

        agent.save_model()

    agent.save_model()

    aicrowd_helper.register_progress(1)
    env.close()