Python Noise.reset примеры использования

Язык программирования: Python

Пространство имен/Пакет: noise

Класс/Тип: Noise

Метод/Функция: reset

Примеров на hotexamples.com: 4

Python Noise.reset - 4 примера найдено. Это лучшие примеры Python кода для noise.Noise.reset, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Noise(30)

reset(4)

ornstein_uhlenbeck_level(3)

sample(3)

turbulence(2)

get_with_octaves(2)

createNoise(1)

chip_stretch_factor(1)

white(1)

adjust(1)

symmetric_noise(1)

scan_stretch_factor(1)

advance_timestep(1)

apply_noise(1)

noisy_line(1)

from_options(1)

noise1d(1)

mol_stretch_factor(1)

add_snp(1)

get_address(1)

getInstantNoise(1)

generate_scan(1)

white_thresholded(1)

Пример #1

Показать файл

Файл: agent.py Проект: crweaver225/Teach-a-quadcopter-to-fly

class Christophers_Agent():
    def __init__(self, task):
        # Task (environment) information
        self.task = task
        self.state_size = task.state_size
        self.action_size = task.action_size
        self.action_low = task.action_low
        self.action_high = task.action_high
        self.action_range = self.action_high - self.action_low

        self.w = np.random.normal(
            size=(
                self.state_size, self.action_size
            ),  # weights for simple linear policy: state_space x action_space
            scale=(self.action_range / (2 * self.state_size)
                   ))  # start producing actions in a decent range

        self.actor = Actor(self.state_size, self.action_size, self.action_low,
                           self.action_high)
        self.critic = Critic(self.state_size, self.action_size)

        self.actor_target = Actor(self.state_size, self.action_size,
                                  self.action_low, self.action_high)
        self.critic_target = Critic(self.state_size, self.action_size)

        self.gamma = 0.95
        self.tau = 0.001

        self.best_w = None
        self.best_score = -np.inf

        self.exploration_mu = 0.5
        self.exploration_theta = 0.2
        self.exploration_sigma = 0.4
        self.noise = Noise(self.action_size, self.exploration_mu,
                           self.exploration_theta, self.exploration_sigma)

        self.buffer_size = 100000
        self.batch_size = 32
        self.memory = ReplayBuffer(self.buffer_size, self.batch_size)

        self.best_score = -np.inf
        self.num_steps = 0

        # Episode variables
        self.reset_episode()

    def reset_episode(self):
        if self.get_score() > self.best_score:
            self.best_score = self.get_score()
        self.total_reward = 0.0
        self.num_steps = 0
        self.noise.reset()
        state = self.task.reset()
        self.last_state = state
        return state

    def step(self, action, reward, next_state, done):
        self.total_reward += reward
        self.num_steps += 1

        self.memory.add(self.last_state, action, reward, next_state, done)

        if len(self.memory) > self.batch_size:
            experiences = self.memory.sample()
            self.learn(experiences)

        self.last_state = next_state

    def act(self, state):
        state = np.reshape(state, [-1, self.state_size])
        action = self.actor.model.predict(state)[0]
        action = list(action +
                      self.noise.sample())  # add some noise for exploration
        return action

    def get_score(self):
        return -np.inf if self.num_steps == 0 else self.total_reward / self.num_steps

    def learn(self, experiences):
        states = np.vstack([e.state for e in experiences if e is not None])
        actions = np.array([e.action for e in experiences
                            if e is not None]).astype(np.float32).reshape(
                                -1, self.action_size)
        rewards = np.array([e.reward for e in experiences if e is not None
                            ]).astype(np.float32).reshape(-1, 1)
        done = np.array([e.done for e in experiences
                         if e is not None]).astype(np.uint8).reshape(-1, 1)
        next_states = np.vstack(
            [e.next_state for e in experiences if e is not None])

        actions_next = self.actor_target.model.predict_on_batch(next_states)
        Q_targets_next = self.critic_target.model.predict_on_batch(
            [next_states, actions_next])

        Q_targets = rewards + self.gamma * Q_targets_next * (1 - done)

        self.critic.model.train_on_batch(x=[states, actions], y=Q_targets)

        action_gradients = np.reshape(
            self.critic.get_action_gradients([states, actions, 0]),
            (-1, self.action_size))
        self.actor.train_fn([states, action_gradients, 1])

        self.soft_update(self.critic.model, self.critic_target.model)
        self.soft_update(self.actor.model, self.actor_target.model)

    def soft_update(self, local_model, target_model):
        local_weights = np.array(local_model.get_weights())
        target_weights = np.array(target_model.get_weights())

        assert len(local_weights) == len(target_weights)

        new_weights = self.tau * local_weights + (1 -
                                                  self.tau) * target_weights
        target_model.set_weights(new_weights)

Пример #2

Показать файл

Файл: gamma_test.py Проект: svd3/DDPG

num_steps = env.episode_len
batch_size = 64
std = 0.1
#agent = Agent(state_dim, action_dim, hidden_dim=64, tau=0.001)
noise = Noise(action_dim, mean=0., std=std)
#replay = ReplayMemory(memory_size)

gamma = Variable(torch.Tensor([0.99]), requires_grad=True)
rewards = []
times = []
agent = load_agent(file='pretrained/model_3.0.pth.tar', gamma=gamma)
for episode in range(20):
    state = torch.Tensor([env.reset()])
    episode_reward = 0.
    #std *= 0.9985
    noise.reset(0., std)
    for t in range(num_steps):
        action = agent.select_action(state, noise)
        next_state, reward, done, _ = env.step(action.cpu().numpy()[0])
        episode_reward += reward
        #action = torch.Tensor(action)
        mask = torch.Tensor([not done])
        next_state = torch.Tensor([next_state])
        reward = torch.Tensor([reward])
        agent.memory.push(state, action, mask, next_state, reward)
        state = next_state
        if len(agent.memory) > batch_size * 2:
            print("True")
            agent.learn(epochs=2, batch_size=batch_size)
        if done:
            #env.goal_radius -= 2e-4

Пример #3

Показать файл

Файл: agent.py Проект: youldash/DRL-Continuous-Control

class Agent():
    """ Class implementation of a so-called "intelligent" agent.
        This agent interacts with and learns from the environment.
        This agent employs the DDPG algorithm to solve this problem.
    """

    # actor_local = None
    # actor_target = None
    # actor_optimizer = None
    """ Class-level Actor properties.
    """

    # critic_local = None
    # critic_target = None
    # critic_optimizer = None
    """ Class-level Critic properties.
    """

    # memory = None
    """ Class-level memory variable.
    """
    def __init__(self, state_size, action_size, seed, add_noise=True):
        """ Initialize an Agent instance.
        
        Params
        ======
            state_size (int): Dimension of each state
            action_size (int): Dimension of each action
            seed (int): Random seed
            add_noise (bool): Toggle for using the stochastic process
        """

        # Set the parameters.
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(seed)

        # Setting the Actor network (with the Target Network).
        self.actor_local = Actor(state_size, action_size, seed).to(device)
        self.actor_target = Actor(state_size, action_size, seed).to(device)

        # Optimize the Actor using Adam.
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(),
                                          lr=LR_ACTOR)

        # Setting the Critic network (with the Target Network).
        self.critic_local = Critic(state_size, action_size, seed).to(device)
        self.critic_target = Critic(state_size, action_size, seed).to(device)

        # Optimize the Critic using Adam.
        self.critic_optimizer = optim.Adam(self.critic_local.parameters(),
                                           lr=LR_CRITIC,
                                           weight_decay=WEIGHT_DECAY)

        # Set up noise processing.
        if add_noise:
            self.noise = Noise((20, action_size), seed)

        # Use the Replay memory buffer (once per class).
        self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed,
                                   device)

        # Initialize the time step (until max NUM_TIME_STEPS is reached).
        # self.t_step = 0

    def step(self, time_step, states, actions, rewards, next_states, dones):
        """ Update the network on each step.
            In other words, save the experience in replay memory,
            and then use random sampling from the buffer to learn.
        """

        # Save experience in replay memory.
        for state, action, reward, next_state, done in zip(
                states, actions, rewards, next_states, dones):
            self.memory.add(state, action, reward, next_state, done)

        # Learn every time step till NUM_TIME_STEPS is reached.
        # if time_step % NUM_TIME_STEPS != 0:
        #     return

        # Save the experience in replay memory, then use random sampling from the buffer to learn.
        self.sample_and_learn()

    def sample_and_learn(self):
        """ For a specified number of agents,
            use random sampling from the buffer to learn.
        """

        # If enough samples are available in memory, get random subset and learn.
        if len(self.memory) > BATCH_SIZE:
            experiences = self.memory.sample()
            self.learn(experiences, GAMMA)

            # for _ in range(NUM_LEARN_UPDATES):
            #     experiences = Agent.memory.sample()
            #     self.learn(experiences, GAMMA)

    def act(self, state, add_noise=True):
        """ Return the actions for a given state as per current policy.
        
        Params
        ======
            state (array_like): Current state
            add_noise (bool): Toggle for using the stochastic process
        """

        state = torch.from_numpy(state).float().to(device)

        self.actor_local.eval()
        with torch.no_grad():
            action = self.actor_local(state).cpu().data.numpy()
        self.actor_local.train()

        # If the stochastic process is enabled.
        if add_noise:
            action += self.noise.sample()

        # Return the action.
        return np.clip(action, -1, 1)

    def reset(self):
        """ Reset the state.
        """

        # Reset the internal state (noise) to mean (mu).
        self.noise.reset()

    def learn(self, experiences, gamma):
        """ Update value parameters using given batch of experience tuples.
            i.e.,
            Q_targets = r + γ * critic_target(next_state, actor_target(next_state))
            where
                actor_target(state) -> action, and
                critic_target(state, action) -> Q-value.
        
        Params
        ======
            experiences (Tuple[torch.Tensor]): Tuple of (s, a, r, s', done, w) tuples 
            gamma (float): Discount factor
        """

        # Set the parameters.
        states, actions, rewards, next_states, dones = experiences
        """ Update the Critic.
        """
        # Get the predicted next-state actions and Q-values from the target models.
        # Calculate the pair action/reward for each of the next_states.
        actions_next = self.actor_target(next_states)
        Q_targets_next = self.critic_target(next_states, actions_next)

        # Compute Q-targets for the current states, (y_i).
        Q_targets = rewards + (gamma * Q_targets_next * (1 - dones))

        # Compute the Critic loss.
        Q_expected = self.critic_local(states, actions)
        critic_loss = F.mse_loss(Q_expected, Q_targets)

        # Minimize the loss.
        self.critic_optimizer.zero_grad()
        critic_loss.backward()
        torch.nn.utils.clip_grad_norm_(self.critic_local.parameters(), 1)
        self.critic_optimizer.step()
        """ Update the Actor.
        """
        # Compute the Actor loss.
        actions_pred = self.actor_local(states)
        actor_loss = -self.critic_local(states, actions_pred).mean()

        # Minimize the loss.
        self.actor_optimizer.zero_grad()
        # torch.nn.utils.clip_grad_norm_(self.actor_local.parameters(), 1)
        actor_loss.backward()
        self.actor_optimizer.step()
        """ Update the target networks.
        """
        self.soft_update(self.critic_local, self.critic_target, TAU)
        self.soft_update(self.actor_local, self.actor_target, TAU)

    def soft_update(self, local_model, target_model, tau):
        """ Soft update model parameters.
            i.e.,
            θ_target = τ * θ_local + (1 - τ) * θ_target.

        Params
        ======
            local_model (PyTorch model): Weights will be copied from
            target_model (PyTorch model): Weights will be copied to
            tau (float): Interpolation parameter 
        """

        for target_param, local_param in zip(target_model.parameters(),
                                             local_model.parameters()):
            target_param.data.copy_(tau * local_param.data +
                                    (1. - tau) * target_param.data)

Пример #4

Показать файл

Файл: agent.py Проект: EvgeniiaGrudinin/Teach-a-Quadcopter-How-to-Fly

class PolicySearch_Agent():
    def __init__(self, task):
        self.task=task
        self.state_size=task.state_size
        self.action_size=task.action_size
        self.action_low=task.action_low
        self.action_high=task.action_high

        self.actor_local=Actor(self.state_size, self.action_size, self.action_low, self.action_high)
        self.actor_target=Actor(self.state_size, self.action_size, self.action_low, self.action_high)
        self.critic_local=Critic(self.state_size, self.action_size)
        self.critic_target=Critic(self.state_size, self.action_size)

        self.critic_target.model.set_weights(self.critic_local.model.get_weights())
        self.actor_target.model.set_weights(self.actor_local.model.get_weights())

        self.mu=0
        self.theta=0.2 
        self.sigma=0.005 # random noise
        self.noise=Noise(self.action_size, self.mu, self.theta, self.sigma)
        self.gamma=0.9 
        self.tau=0.1 
        self.best_score=-np.inf
        self.score=0
        
        self.buffer_size=100000
        self.batch_size=64
        self.memory=ReplayBuffer(self.buffer_size, self.batch_size)

    def reset_episode(self):
        self.noise.reset()
        state=self.task.reset()
        self.last_state=state
        self.score=0
        return state

    def step(self, action, reward, next_state, done):
        self.memory.add(self.last_state, action, reward, next_state, done)
        if len(self.memory) > self.batch_size:
            experiences=self.memory.sample()
            self.learn(experiences)
        self.last_state=next_state
        self.score+=reward
        if done:
            if self.score > self.best_score:
                self.best_score=self.score

    def act(self, states):
        state=np.reshape(states, [-1, self.state_size])
        action=self.actor_local.model.predict(state)[0]
        return list(action+self.noise.sample())  

    def learn(self, experiences):
        states=np.vstack([e.state for e in experiences if e is not None])
        actions=np.array([e.action for e in experiences if e is not None]).astype(np.float32).reshape(-1, self.action_size)
        rewards=np.array([e.reward for e in experiences if e is not None]).astype(np.float32).reshape(-1, 1)
        dones=np.array([e.done for e in experiences if e is not None]).astype(np.uint8).reshape(-1, 1)
        next_states=np.vstack([e.next_state for e in experiences if e is not None])
        actions_next=self.actor_target.model.predict_on_batch(next_states)
        Q_values_next=self.critic_target.model.predict_on_batch([next_states, actions_next])
        Q_values=rewards+self.gamma*Q_values_next*(1 - dones)
        self.critic_local.model.train_on_batch(x=[states, actions], y=Q_values)
        action_gradients = np.reshape(self.critic_local.get_action_gradients([states, actions, 0]), (-1, self.action_size))
        self.actor_local.train_fn([states, action_gradients, 1]) 
        self.update(self.critic_local.model, self.critic_target.model)
        self.update(self.actor_local.model, self.actor_target.model)

    def update(self, local_model, target_model):
        local_weights=np.array(local_model.get_weights())
        target_weights=np.array(target_model.get_weights())
        assert len(local_weights)==len(target_weights)
        new_weights=self.tau*local_weights+(1-self.tau)*target_weights
        target_model.set_weights(new_weights)