Python DQN.forward примеры использования

Язык программирования: Python

Пространство имен/Пакет: model

Класс/Тип: DQN

Метод/Функция: forward

Примеров на hotexamples.com: 5

Python DQN.forward - 5 примеров найдено. Это лучшие примеры Python кода для model.DQN.forward, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

DQN(30)

load_state_dict(30)

parameters(30)

train(30)

eval(30)

state_dict(30)

get_action(25)

init_state(20)

remember(19)

cuda(18)

reset_noise(14)

update_target_network(13)

zero_grad(12)

save(12)

load(11)

act(10)

to(8)

predict(8)

sample_noise(5)

forward(5)

update_noisy_modules(4)

update(4)

sample_action(3)

chooseAction(2)

update_target(2)

share_memory(2)

save_model(2)

initState(2)

getAction(2)

learn(2)

optimize(2)

perceive(1)

train_Xy(1)

store_path(1)

store_transition(1)

copy2target(1)

to_gpu(1)

choose_action(1)

train_net(1)

step(1)

train_step(1)

memory(1)

updateTargetNetwork(1)

fit(1)

forward_with_latent(1)

apply(1)

store(1)

copy_from(1)

pick_action(1)

reset_model(1)

Пример #1

Показать файл

class Testor:
    def __init__(self, model_dict, idx, num_channels=3, num_actions=19):
        import gym
        import minerl
        self.testor_idx = idx
        self.env = gym.make(ENV_NAME)
        self.port_number = int("12340") + self.testor_idx
        print("testor environment %d initialize successfully" %
              self.testor_idx)
        self.env.make_interactive(port=self.port_number, realtime=False)

        self.testor_network = DQN(num_channels, num_actions).cuda()
        self.testor_network.load_state_dict(model_dict)
        print("testor network %d initialize successfully" % self.testor_idx)

        self.writer = SummaryWriter(f'runs/apex/test/testor{self.testor_idx}')

        self.max_epi = 100

    def explore(self):
        for num_epi in range(self.max_epi):
            obs = self.env.reset()
            state = converter(ENV_NAME, obs).cuda()
            state = state.float()
            done = False
            total_reward = 0
            steps = 0
            total_steps = 0

            while not done:
                steps += 1
                total_steps += 1
                action_tensor = self.testor_network.forward(state)
                print(action_tensor)
                action_index = torch.argmax(action_tensor).item()
                print(action_index)

                action = make_19action(self.env, action_index)
                #print(action)
                obs_prime, reward, done, info = self.env.step(action)
                total_reward += reward
                state_prime = converter(ENV_NAME, obs_prime).cuda()
                state = state_prime
                if done:
                    print("%d episode is done" % num_epi)
                    print("total rewards : %d " % total_reward)
                    self.writer.add_scalar('Rewards/test', total_reward,
                                           num_epi)
                    break

Пример #2

Показать файл

Файл: Chapter_6_3.py Проект: PacktPublishing/Hands-On-Game-AI-with-Python

class DQNAgent:
    def __init__(self, config: Config):
        self.config = config
        self.is_training = True
        self.buffer = ReplayBuffer(self.config.max_buff)

        self.model = DQN(self.config.state_dim, self.config.action_dim).cuda()
        self.model_optim = Adam(self.model.parameters(), lr=self.config.learning_rate)

        if self.config.use_cuda:
            self.cuda()

    def act(self, state, epsilon=None):
        if epsilon is None: epsilon = self.config.epsilon_min
        if random.random() > epsilon or not self.is_training:
            state = torch.tensor(state, dtype=torch.float).unsqueeze(0)
            if self.config.use_cuda:
                state = state.cuda()
            q_value = self.model.forward(state)
            action = q_value.max(1)[1].item()
        else:
            action = random.randrange(self.config.action_dim)
        return action

    def learning(self, fr):
        s0, a, r, s1, done = self.buffer.sample(self.config.batch_size)

        s0 = torch.tensor(s0, dtype=torch.float)
        s1 = torch.tensor(s1, dtype=torch.float)
        a = torch.tensor(a, dtype=torch.long)
        r = torch.tensor(r, dtype=torch.float)
        done = torch.tensor(done, dtype=torch.float)

        if self.config.use_cuda:
            s0 = s0.cuda()
            s1 = s1.cuda()
            a = a.cuda()
            r = r.cuda()
            done = done.cuda()

        q_values = self.model(s0).cuda()
        next_q_values = self.model(s1).cuda()
        next_q_value = next_q_values.max(1)[0]

        q_value = q_values.gather(1, a.unsqueeze(1)).squeeze(1)
        expected_q_value = r + self.config.gamma * next_q_value * (1 - done)
        # Notice that detach the expected_q_value
        loss = (q_value - expected_q_value.detach()).pow(2).mean()

        self.model_optim.zero_grad()
        loss.backward()
        self.model_optim.step()


        return loss.item()

    def cuda(self):
        self.model.cuda()

    def load_weights(self, model_path):
        if model_path is None: return
        self.model.load_state_dict(torch.load(model_path))

    def save_model(self, output, tag=''):
        torch.save(self.model.state_dict(), '%s/model_%s.pkl' % (output, tag))

    def save_config(self, output):
        with open(output + '/config.txt', 'w') as f:
            attr_val = get_class_attr_val(self.config)
            for k, v in attr_val.items():
                f.write(str(k) + " = " + str(v) + "\n")

Пример #3

Показать файл

# saving training variables
outliers = []
centroids = []
G = []
episode_rewards = []

mean_reward_episodes_list = []
best_reward_episodes_list = []
episode_rewards_list = []

for t in range(MAX_FRAMES):
    x = np.concatenate((s, g), axis=0).reshape((1, 5, 84, 84))
    if t < LEARNING_STARTS:
        a = env.action_space.sample()
    else:
        qt = Qt.forward(torch.Tensor(x).type(dtype) / 255)
        a = epsilon_greedy(qt.cpu().detach().numpy(),
                           epsilon=epsilon)  # random action
    SP, r, terminal, step_info = step(a)
    episode_rewards.append(r)
    sp = four_frames_to_4_84_84(SP)
    xp = np.concatenate((sp, g), axis=0).reshape((1, 5, 84, 84))
    man_mask = get_man_mask(SP)
    man_loc = get_man_xy_np_coordinate(man_mask)
    # intrinsic_done_task = are_masks_align(man_mask, subgoal_mask)
    intrinsic_done_task = is_man_inside_subgoal_mask(man_mask, subgoal_mask)
    # outlier
    if r > 0:
        print('Outler detected at', man_loc)
        outliers.append(man_loc)
    R += r

Пример #4

Показать файл

Файл: atari_ddqn.py Проект: kaushikb258/RL_pytorch

class DDQNAgent:
    def __init__(self, config: Config, training=True):
        self.config = config
        self.is_training = training
        self.buffer = ReplayBuffer(self.config.max_buff)

        self.model = DQN(self.config.state_shape, self.config.action_dim)
        self.target_model = DQN(self.config.state_shape,
                                self.config.action_dim)
        self.target_model.load_state_dict(self.model.state_dict())

        self.optim = Adam(self.model.parameters(),
                          lr=self.config.learning_rate)

        self.model.cuda()
        self.target_model.cuda()

    def act(self, state, epsilon=None):
        if epsilon is None: epsilon = self.config.epsilon_min
        if random.random() > epsilon or not self.is_training:
            state = torch.tensor(state, dtype=torch.float).unsqueeze(0)
            state = state.cuda()
            q_value = self.model.forward(state)
            action = q_value.max(1)[1].item()
        else:
            action = random.randrange(self.config.action_dim)
        return action

    def learn(self, t):
        s, a, r, s2, done = self.buffer.sample(self.config.batch_size)

        s = torch.tensor(s, dtype=torch.float)
        a = torch.tensor(a, dtype=torch.long)
        r = torch.tensor(r, dtype=torch.float)
        s2 = torch.tensor(s2, dtype=torch.float)
        done = torch.tensor(done, dtype=torch.float)

        s = s.cuda()
        a = a.cuda()
        r = r.cuda()
        s2 = s2.cuda()
        done = done.cuda()

        q_values = self.model(s).cuda()
        next_q_values = self.model(s2).cuda()
        next_q_state_values = self.target_model(s2).cuda()

        q_value = q_values.gather(1, a.unsqueeze(1)).squeeze(1)
        next_q_value = next_q_state_values.gather(
            1,
            next_q_values.max(1)[1].unsqueeze(1)).squeeze(1)
        expected_q_value = r + self.config.gamma * next_q_value * (1 - done)

        loss = (q_value - expected_q_value.detach()).pow(2).mean()

        self.optim.zero_grad()
        loss.backward()
        self.optim.step()

        if t % self.config.update_interval == 0:
            self.target_model.load_state_dict(self.model.state_dict())

        return loss.item()

    def load_weights(self, model_path):
        model = torch.load(model_path)
        if 'model' in model:
            self.model.load_state_dict(model['model'])
        else:
            self.model.load_state_dict(model)

    def save_checkpoint(self):
        os.makedirs('ckpt', exist_ok=True)
        torch.save(self.model.state_dict(), 'ckpt/model.pt')

    def load_checkpoint(self):
        self.model.load_state_dict('ckpt/model.pt')
        self.target_model.load_state_dict('ckpt/model.pt')

Пример #5

Показать файл

class Algorithm():
    def __init__(self, lr, gamma, act_dim, state_dim, memory_capacity, epsilon,
                 batch_size):
        self.model = DQN(state_dim, act_dim)
        self.state_dim = state_dim
        self.act_dim = act_dim
        self.lr = lr
        self.gamma = gamma
        self.epsilon = epsilon
        self.target_model = copy.deepcopy(self.model)
        self.optimizer = optim.Adam(self.model.parameters(), lr=self.lr)
        self.loss = nn.MSELoss()
        self.memory_capacity = memory_capacity
        self.replay_buffer = np.zeros((memory_capacity, 2 * state_dim + 3))
        self.memory_counter = 0
        self.batch_size = batch_size

    def sync_target(self):
        self.target_model.load_state_dict(self.model.state_dict())

    def pridict(self, obs):
        return self.model.forward(obs)

    def choose_action(self, state):
        state = torch.unsqueeze(torch.Tensor(state), 0)
        if np.random.rand() <= self.epsilon:
            action_value = self.model.forward(state)
            action = torch.max(action_value, dim=1)[1].numpy()[0]
        else:
            action = np.random.randint(0, self.act_dim)
        return action

    def store_transition(self, state, action, reward, next_state, done):
        transition = np.hstack((state, [action, reward], next_state, done))
        index = self.memory_counter % self.memory_capacity
        self.replay_buffer[index, :] = transition
        self.memory_counter += 1

    def learn(self):
        sample_index = np.random.choice(self.memory_capacity, self.batch_size)
        batch_memory = self.replay_buffer[sample_index, :]
        batch_state = torch.FloatTensor(batch_memory[:, :self.state_dim])
        batch_action = torch.LongTensor(
            batch_memory[:, self.state_dim:self.state_dim + 1].astype(int))
        batch_reward = torch.FloatTensor(batch_memory[:, self.state_dim +
                                                      1:self.state_dim + 2])
        batch_next_state = torch.FloatTensor(
            batch_memory[:, self.state_dim + 2:2 * self.state_dim + 2])
        batch_done = torch.FloatTensor(batch_memory[:, -1:])

        next_value = self.target_model.forward(batch_next_state)
        max_value = torch.max(next_value, dim=1)[0]
        torch.detach(max_value)

        target = batch_reward.squeeze() + self.gamma * (
            1 - batch_done).squeeze() * max_value

        q_value = self.model.forward(batch_state)
        behavior = torch.gather(q_value, dim=1, index=batch_action).squeeze()

        self.optimizer.zero_grad()

        output = self.loss(behavior, target)
        output.backward()
        self.optimizer.step()

        return output