Пример #1
0
    def batch_and_learn(i, lock=threading.Lock()):
        """Thread target for the learning process."""
        nonlocal step, stats
        timings = prof.Timings()
        while step < flags.total_steps:
            timings.reset()
            batch, agent_state = get_batch(
                flags,
                free_queue,
                full_queue,
                buffers,
                initial_agent_state_buffers,
                timings,
            )
            stats = learn(flags, model, learner_model, batch, agent_state,
                          optimizer, scheduler)
            timings.time("learn")
            with lock:
                to_log = dict(step=step)
                to_log.update({k: stats[k] for k in stat_keys})
                plogger.log(to_log)
                step += T * B

        if i == 0:
            logging.info("Batch and learn: %s", timings.summary())
Пример #2
0
    def batch_and_learn(i, lock=threading.Lock()):
        """Thread target for the learning process."""
        nonlocal step, stats
        timings = prof.Timings()
        while step < flags.total_steps:
            timings.reset()
            batch, agent_state = get_batch(
                flags,
                free_queue,
                full_queue,
                buffers,
                initial_agent_state_buffers,
                timings,
            )
            # print('Before Learn')
            stats = learn(flags, model, learner_model, batch, agent_state,
                          optimizer, scheduler)
            # print('After Learn')
            timings.time("learn")
            with lock:
                # step-wise learning rate annealing
                # TODO : How to perform annealing here exactly, we dont have access to the train_step !
                if flags.scheduler in ['cosine', 'constant', 'dev_perf']:
                    # linear warmup stage
                    if step < flags.warmup_step:
                        curr_lr = flags.lr * step / flags.warmup_step
                        optimizer.param_groups[0]['lr'] = curr_lr
                    else:
                        if flags.scheduler == 'cosine':
                            scheduler.step()
                elif flags.scheduler == 'inv_sqrt':
                    scheduler.step()

                to_log = dict(step=step)
                to_log.update({k: stats[k] for k in stat_keys})
                plogger.log(to_log)
                # print('updating step from {} to {}'.format(step, step+(T*B)))
                step += T * B

        if i == 0:
            logging.info("Batch and learn: %s", timings.summary())
Пример #3
0
def act(
    flags,
    actor_index: int,
    free_queue: mp.SimpleQueue,
    full_queue: mp.SimpleQueue,
    model: torch.nn.Module,
    buffers: Buffers,
    initial_agent_state_buffers,
):
    try:
        logging.info("Actor %i started.", actor_index)
        timings = prof.Timings()  # Keep track of how fast things are.

        gym_env = create_env(flags)
        seed = actor_index ^ int.from_bytes(os.urandom(4), byteorder="little")
        gym_env.seed(seed)
        env = environment.Environment(gym_env)
        env_output = env.initial()
        agent_state = model.initial_state(batch_size=1)
        agent_output, unused_state = model(env_output, agent_state)
        while True:
            index = free_queue.get()
            if index is None:
                break

            # Write old rollout end.
            for key in env_output:
                buffers[key][index][0, ...] = env_output[key]
            for key in agent_output:
                buffers[key][index][0, ...] = agent_output[key]
            for i, tensor in enumerate(agent_state):
                initial_agent_state_buffers[index][i][...] = tensor

            # Do new rollout.
            for t in range(flags.unroll_length):
                timings.reset()

                with torch.no_grad():
                    agent_output, agent_state = model(env_output, agent_state)

                timings.time("model")

                env_output = env.step(agent_output["action"])

                timings.time("step")

                for key in env_output:
                    buffers[key][index][t + 1, ...] = env_output[key]
                for key in agent_output:
                    buffers[key][index][t + 1, ...] = agent_output[key]

                timings.time("write")
            full_queue.put(index)

        if actor_index == 0:
            logging.info("Actor %i: %s", actor_index, timings.summary())

    except KeyboardInterrupt:
        pass  # Return silently.
    except Exception as e:
        logging.error("Exception in worker process %i", actor_index)
        traceback.print_exc()
        print()
        raise e
Пример #4
0
def act(
    flags,
    actor_index: int,
    free_queue: mp.SimpleQueue,
    full_queue: mp.SimpleQueue,
    model: torch.nn.Module,
    buffers: Buffers,
    initial_agent_state_buffers,
):
    try:
        logging.info("Actor %i started.", actor_index)
        timings = prof.Timings()  # Keep track of how fast things are.

        gym_env = create_env(flags)
        seed = actor_index ^ int.from_bytes(os.urandom(4), byteorder="little")
        gym_env.seed(seed)
        env = environment.Environment(gym_env)
        env_output = env.initial()

        agent_state = model.initial_state(batch_size=1)
        mems, mem_padding = None, None
        agent_output, unused_state, mems, mem_padding, _ = model(
            env_output, agent_state, mems, mem_padding)
        while True:
            index = free_queue.get()
            if index is None:
                break

            # explicitly make done False to allow the loop to run
            # Don't need to set 'done' to true since now take step out of done state
            # when do arrive at 'done'
            # env_output['done'] = torch.tensor([0], dtype=torch.uint8)

            # Write old rollout end.
            for key in env_output:
                buffers[key][index][0, ...] = env_output[key]
            for key in agent_output:
                buffers[key][index][0, ...] = agent_output[key]
            for i, tensor in enumerate(agent_state):
                initial_agent_state_buffers[index][i][...] = tensor

            # Do one new rollout, untill flags.unroll_length
            t = 0
            while t < flags.unroll_length and not env_output['done'].item():
                # for t in range(flags.unroll_length):
                timings.reset()

                # REmoved since never this will never be true (MOVED TO AFTER FOR LOOP)
                # if env_output['done'].item():
                #    mems = None

                with torch.no_grad():
                    agent_output, agent_state, mems, mem_padding, _ = model(
                        env_output, agent_state, mems, mem_padding)

                timings.time("model")

                # TODO: Shakti add action repeat?
                env_output = env.step(agent_output["action"])

                timings.time("step")

                for key in env_output:
                    buffers[key][index][t + 1, ...] = env_output[key]
                for key in agent_output:
                    buffers[key][index][t + 1, ...] = agent_output[key]

                timings.time("write")
                t += 1

            if env_output['done'].item():
                mems = None
                # Take arbitrary step to reset environment
                env_output = env.step(torch.tensor([2]))

            if t != flags.unroll_length:
                # TODO I checked and seems good but Shakti can you check as well?
                buffers['done'][index][t + 1:] = torch.tensor(
                    [True]).repeat(flags.unroll_length - t)

            full_queue.put(index)

        if actor_index == 0:
            logging.info("Actor %i: %s", actor_index, timings.summary())

    except KeyboardInterrupt:
        pass  # Return silently.
    except Exception as e:
        logging.error("Exception in worker process %i", actor_index)
        traceback.print_exc()
        # print()
        raise e