Exemplo n.º 1
0
def create_model(loaded):
    '''
    Create Autoencoder from data that has previously been saved

    Parameters:
        loaded   A model that has been loaded from a file

    Returns:
        newly created Autoencoder
    '''
    old_args = loaded['args_dict']
    enl, dnl = AutoEncoder.get_non_linearity(old_args['nonlinearity'])
    product = AutoEncoder(encoder_sizes=old_args['encoder'],
                          encoding_dimension=old_args['dimension'],
                          encoder_non_linearity=enl,
                          decoder_non_linearity=dnl,
                          decoder_sizes=old_args['decoder'])
    product.load_state_dict(loaded['model_state_dict'])
    return product
Exemplo n.º 2
0
class DDQNAgent: 

    def __init__(self, env, network, buffer, epsilon=0.05, batch_size=32):

        self.ae = AutoEncoder(25)
        self.ae.load_state_dict(torch.load('lunar_models/code25.pt', map_location=torch.device('cpu')))
        self.env = env
        self.network = network
        self.target_network = deepcopy(network)
        self.buffer = buffer
        self.epsilon = epsilon
        self.batch_size = batch_size
        self.window = 100
        self.reward_threshold = 195  # Avg reward before CartPole is "solved"
        self.initialize()

    def take_step(self, mode='train'):

        norm_s0 = normalize_vec(self.s_0)
        boh = torch.from_numpy(norm_s0).float()
        ae_out, ae_code = self.ae(Variable( boh.to('cpu')), 100, 100, 50)
        new_s = ae_code.detach().to('cpu').numpy()

        if mode == 'explore':
            action = self.env.action_space.sample()
        else:
            action = self.network.get_action(new_s, epsilon=self.epsilon)
            self.step_count += 1
        s_1, r, done, _ = self.env.step(action)
        self.rewards += r
        norm_s1 = normalize_vec(s_1)
        ae_out, ae_code = self.ae(Variable(torch.from_numpy(norm_s1).float().to('cpu')), 100, 100, 50)
        new_s1 = ae_code.detach().to('cpu').numpy()

        self.buffer.append(new_s, action, r, done, new_s1)
        self.s_0 = s_1.copy()
        if done:
            self.s_0 = self.env.reset()
        return done

    # Implement DQN training algorithm
    def train(self, gamma=0.99, max_episodes=10000,
              batch_size=32,
              network_update_frequency=4,
              network_sync_frequency=2000):
        self.gamma = gamma
        # Populate replay buffer
        while self.buffer.burn_in_capacity() < 1:
            self.take_step(mode='explore')

        ep = 0
        training = True
        while training:
            self.s_0 = self.env.reset()
            self.rewards = 0
            done = False
            while done == False:
                if((ep % 50) == 0 ):
                    self.env.render()
                done = self.take_step(mode='train')
                # Update network
                if self.step_count % network_update_frequency == 0:
                    self.update()
                # Sync networks
                if self.step_count % network_sync_frequency == 0:
                    self.target_network.load_state_dict(
                        self.network.state_dict())
                    self.sync_eps.append(ep)

                if done:
                    ep += 1
                    self.training_rewards.append(self.rewards)
                    self.training_loss.append(np.mean(self.update_loss))
                    self.update_loss = []
                    mean_rewards = np.mean(
                        self.training_rewards[-self.window:])
                    self.mean_training_rewards.append(mean_rewards)
                    print("\rEpisode {:d} Mean Rewards {:.2f}\t\t".format(
                        ep, mean_rewards), end="")

                    if ep >= max_episodes:
                        training = False
                        print('\nEpisode limit reached.')
                        break
                    if mean_rewards >= self.reward_threshold:
                        training = False
                        print('\nEnvironment solved in {} episodes!'.format(
                            ep))
                        break

    def calculate_loss(self, batch):
        states, actions, rewards, dones, next_states = [i for i in batch]
        rewards_t = torch.FloatTensor(rewards).to(device=self.network.device).reshape(-1, 1)
        actions_t = torch.LongTensor(np.array(actions)).reshape(-1, 1).to(
            device=self.network.device)
        dones_t = torch.ByteTensor(dones).to(device=self.network.device)

        qvals = torch.gather(self.network.get_qvals(states), 1, actions_t)

        #################################################################
        # DDQN Update
        next_actions = torch.max(self.network.get_qvals(next_states), dim=-1)[1]
        next_actions_t = torch.LongTensor(next_actions).reshape(-1, 1).to(
            device=self.network.device)
        target_qvals = self.target_network.get_qvals(next_states)
        qvals_next = torch.gather(target_qvals, 1, next_actions_t).detach()
        #################################################################
        qvals_next[dones_t] = 0  # Zero-out terminal states
        expected_qvals = self.gamma * qvals_next + rewards_t
        loss = nn.MSELoss()(qvals, expected_qvals)
        return loss

    def update(self):
        self.network.optimizer.zero_grad()
        batch = self.buffer.sample_batch(batch_size=self.batch_size)
        loss = self.calculate_loss(batch)
        loss.backward()
        self.network.optimizer.step()
        if self.network.device == 'cuda':
            self.update_loss.append(loss.detach().cpu().numpy())
        else:
            self.update_loss.append(loss.detach().numpy())

    def initialize(self):
        self.training_rewards = []
        self.training_loss = []
        self.update_loss = []
        self.mean_training_rewards = []
        self.sync_eps = []
        self.rewards = 0
        self.step_count = 0
        self.s_0 = self.env.reset()
Exemplo n.º 3
0
        print(DEVICE)
    train = get_dataset()
    if False:
        print("Showing Random images from dataset")
        showRandomImaged(train)

    model = AutoEncoder().cuda() if torch.cuda.is_available() else AutoEncoder(
    )
    if __DEBUG__ == True:
        print(model)

    criterian = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), weight_decay=1e-5)

    if LOAD == True:
        model.load_state_dict(torch.load(PATH))

    for epoch in range(EPOCHS):
        for i, (images, _) in enumerate(train):
            images = images.to(DEVICE)
            out = model(images)
            loss = criterian(out, images)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            ## LOG
            print(f"epoch {epoch}/{EPOCHS}\nLoss : {loss.data}")

            if __DEBUG__ == True:
                if i % 10 == 0: