Python Memory.add примеры использования

Язык программирования: Python

Пространство имен/Пакет: memory.memory

Класс/Тип: Memory

Метод/Функция: add

Примеров на hotexamples.com: 4

Python Memory.add - 4 примера найдено. Это лучшие примеры Python кода для memory.memory.Memory.add, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Memory(19)

add(4)

add_knn(3)

add_knn_lru(3)

sample(2)

next_memory_space(2)

get_address(2)

attach(2)

scan(2)

add_rand(2)

add_lru(2)

get_short_term_memory(1)

save(1)

sample_knn_test(1)

sample_batch(1)

reset_scan(1)

next_memory_chunk(1)

get_batch(1)

get_long_term_memory(1)

get_data(1)

get_constant_address(1)

_scan_cull(1)

free_if_temp(1)

detach(1)

clear_memory(1)

clear_instance_memory(1)

add_to_memory(1)

update_data(1)

Пример #1

Показать файл

def _save_experience(experience: Experience, memory: Memory,
                     next_state: np.ndarray):
    """
    Save a new experience by replaced next state of agent
    :param next_state: nest state of agent
    :param experience: Experience of made action
    :param memory: memory of robot with all saved experiences
    :return: next state which agent will make
    """
    experience = experience._replace(next_state=next_state)
    memory.add(experience)

Пример #2

Показать файл

Файл: ddpg.py Проект: dkd58/DeepRL

class DDPG(Agent):
    def __init__(self, env, monitor_path: str, **usercfg) -> None:
        super(DDPG, self).__init__(**usercfg)
        self.env = env
        self.monitor_path: str = monitor_path

        self.config.update(
            n_episodes=100000,
            n_timesteps=env.spec.tags.get(
                "wrapper_config.TimeLimit.max_episode_steps"),
            actor_learning_rate=1e-4,
            critic_learning_rate=1e-3,
            ou_theta=0.15,
            ou_sigma=0.2,
            gamma=0.99,
            batch_size=64,
            tau=0.001,
            l2_loss_coef=1e-2,
            n_actor_layers=2,
            n_hidden_units=64,
            actor_layer_norm=True,
            critic_layer_norm=
            False,  # Batch norm for critic does not seem to work
            replay_buffer_size=1e6,
            replay_start_size=
            10000  # Required number of replay buffer entries to start training
        )
        self.config.update(usercfg)

        self.state_shape: list = list(env.observation_space.shape)
        self.n_actions: int = env.action_space.shape[0]
        self.states = tf.placeholder(tf.float32, [None] + self.state_shape,
                                     name="states")
        self.actions_taken = tf.placeholder(tf.float32, [None, self.n_actions],
                                            name="actions_taken")
        self.critic_target = tf.placeholder(tf.float32, [None, 1],
                                            name="critic_target")
        self.is_training = tf.placeholder(tf.bool, name="is_training")

        with tf.variable_scope("actor"):
            self.action_output, self.actor_vars = self.build_actor_network()

        self.target_action_output, actor_target_update = self.build_target_actor_network(
            self.actor_vars)

        self.q_gradient_input = tf.placeholder("float", [None, self.n_actions],
                                               name="q_grad_input")
        self.actor_policy_gradients = tf.gradients(self.action_output,
                                                   self.actor_vars,
                                                   -self.q_gradient_input,
                                                   name="actor_gradients")
        self.actor_train_op = tf.train.AdamOptimizer(
            self.config["actor_learning_rate"],
            name="actor_optimizer").apply_gradients(
                list(zip(self.actor_policy_gradients, self.actor_vars)))

        with tf.variable_scope("critic"):
            self.q_value_output, self.critic_vars = self.build_critic_network()

        self.target_q_value_output, critic_target_update = self.build_target_critic_network(
            self.critic_vars)

        l2_loss = tf.add_n([
            self.config["l2_loss_coef"] * tf.nn.l2_loss(var)
            for var in self.critic_vars
        ])
        self.critic_loss = tf.reduce_mean(
            tf.square(self.critic_target - self.q_value_output)) + l2_loss
        self.critic_train_op = tf.train.AdamOptimizer(
            self.config["critic_learning_rate"],
            name="critic_optimizer").minimize(self.critic_loss)
        self.action_gradients = tf.gradients(self.q_value_output,
                                             self.actions_taken,
                                             name="action_gradients")

        summaries = []
        for v in self.actor_vars + self.critic_vars:
            summaries.append(tf.summary.histogram(v.name, v))
        self.model_summary_op = tf.summary.merge(summaries)

        self.update_targets_op = tf.group(actor_target_update,
                                          critic_target_update,
                                          name="update_targets")

        self.init_op = tf.global_variables_initializer()

        self.action_noise = OrnsteinUhlenbeckActionNoise(
            self.n_actions, self.config["ou_sigma"], self.config["ou_theta"])

        self.replay_buffer = Memory(int(self.config["replay_buffer_size"]))

        self.n_updates = 0

        self.summary_writer = tf.summary.FileWriter(
            os.path.join(self.monitor_path, "summaries"),
            tf.get_default_graph())

    def build_actor_network(self):
        layer1_size = 400
        layer2_size = 300

        x = self.states
        if self.config["actor_layer_norm"]:
            x = batch_norm_layer(x,
                                 training_phase=self.is_training,
                                 scope_bn="batch_norm_0",
                                 activation=tf.identity)
        with tf.variable_scope("L1"):
            x, l1_vars = linear_fan_in(x, layer1_size)
            if self.config["actor_layer_norm"]:
                x = batch_norm_layer(x,
                                     training_phase=self.is_training,
                                     scope_bn="batch_norm_1",
                                     activation=tf.nn.relu)
        with tf.variable_scope("L2"):
            x, l2_vars = linear_fan_in(x, layer2_size)
            if self.config["actor_layer_norm"]:
                x = batch_norm_layer(x,
                                     training_phase=self.is_training,
                                     scope_bn="batch_norm_2",
                                     activation=tf.nn.relu)

        with tf.variable_scope("L3"):
            W3 = tf.Variable(tf.random_uniform([layer2_size, self.n_actions],
                                               -3e-3, 3e-3),
                             name="w")
            b3 = tf.Variable(tf.random_uniform([self.n_actions], -3e-3, 3e-3),
                             name="b")
            action_output = tf.tanh(tf.nn.xw_plus_b(x, W3, b3))
            l3_vars = [W3, b3]

        return action_output, l1_vars + l2_vars + l3_vars

    def build_target_actor_network(self, actor_vars: list):
        ema = tf.train.ExponentialMovingAverage(decay=1 - self.config["tau"])
        target_update = ema.apply(actor_vars)
        target_net = [ema.average(v) for v in actor_vars]

        x = self.states
        if self.config["actor_layer_norm"]:
            x = batch_norm_layer(x,
                                 training_phase=self.is_training,
                                 scope_bn="target_batch_norm_0",
                                 activation=tf.identity)

        x = tf.nn.xw_plus_b(x, target_net[0], target_net[1])
        if self.config["actor_layer_norm"]:
            x = batch_norm_layer(x,
                                 training_phase=self.is_training,
                                 scope_bn="target_batch_norm_1",
                                 activation=tf.nn.relu)
        x = tf.nn.xw_plus_b(x, target_net[2], target_net[3])
        if self.config["actor_layer_norm"]:
            x = batch_norm_layer(x,
                                 training_phase=self.is_training,
                                 scope_bn="target_batch_norm_2",
                                 activation=tf.nn.relu)

        action_output = tf.tanh(
            tf.nn.xw_plus_b(x, target_net[4], target_net[5]))

        return action_output, target_update

    def build_critic_network(self):
        layer1_size = 400
        layer2_size = 300

        x = self.states
        with tf.variable_scope("L1"):
            if self.config[
                    "critic_layer_norm"]:  # Defaults to False (= don't use it)
                x = batch_norm_layer(x,
                                     training_phase=self.is_training,
                                     scope_bn="batch_norm_0",
                                     activation=tf.identity)
            x, l1_vars = linear_fan_in(x, layer1_size)
            x = tf.nn.relu(x)
        with tf.variable_scope("L2"):
            W2 = tf.get_variable(
                "w", [layer1_size, layer2_size],
                initializer=fan_in_initializer(layer1_size + self.n_actions))
            W2_action = tf.get_variable(
                "w_action", [self.n_actions, layer2_size],
                initializer=fan_in_initializer(layer1_size + self.n_actions))
            b2 = tf.get_variable(
                "b", [layer2_size],
                initializer=fan_in_initializer(layer1_size + self.n_actions))
            x = tf.nn.relu(
                tf.matmul(x, W2) + tf.matmul(self.actions_taken, W2_action) +
                b2)
        with tf.variable_scope("L3"):
            W3 = tf.Variable(tf.random_uniform([layer2_size, 1], -3e-3, 3e-3),
                             name="w")
            b3 = tf.Variable(tf.random_uniform([1], -3e-3, 3e-3), name="b")
            q_value_output = tf.nn.xw_plus_b(x, W3, b3, name="q_value")

        return q_value_output, l1_vars + [W2, W2_action, b2, W3, b3]

    def build_target_critic_network(self, critic_vars: list):

        ema = tf.train.ExponentialMovingAverage(decay=1 - self.config["tau"])
        target_update = ema.apply(critic_vars)
        target_net = [ema.average(v) for v in critic_vars]

        x = self.states
        if self.config["critic_layer_norm"]:
            x = batch_norm_layer(x,
                                 training_phase=self.is_training,
                                 scope_bn="batch_norm_0",
                                 activation=tf.identity)
        x = tf.nn.relu(tf.nn.xw_plus_b(x, target_net[0], target_net[1]))
        x = tf.nn.relu(
            tf.matmul(x, target_net[2]) +
            tf.matmul(self.actions_taken, target_net[3]) + target_net[4])
        q_value_output = tf.nn.xw_plus_b(x, target_net[5], target_net[6])

        return q_value_output, target_update

    def actor_gradients(self, state_batch: np.ndarray,
                        action_batch: np.ndarray):
        q, grads = tf.get_default_session().run(
            [self.q_value_output, self.action_gradients],
            feed_dict={
                self.states: state_batch,
                self.actions_taken: action_batch,
                self.is_training: False
            })
        summary = tf.Summary()
        summary.value.add(tag="model/actor_loss",
                          simple_value=float(-np.mean(q)))
        self.summary_writer.add_summary(summary, self.n_updates)
        return grads[0]

    def target_q(self, states: np.ndarray, actions: np.ndarray):
        return tf.get_default_session().run(self.target_q_value_output,
                                            feed_dict={
                                                self.states: states,
                                                self.actions_taken: actions,
                                                self.is_training: False
                                            })

    def q_value(self, states: np.ndarray, actions: np.ndarray):
        return tf.get_default_session().run(self.q_value_output,
                                            feed_dict={
                                                self.states: states,
                                                self.actions_taken: actions,
                                                self.is_training: False
                                            })

    def actions(self, states: np.ndarray) -> np.ndarray:
        """Get the actions for a batch of states."""
        return tf.get_default_session().run(self.action_output,
                                            feed_dict={
                                                self.states: states,
                                                self.is_training: True
                                            })

    def action(self, state: np.ndarray) -> np.ndarray:
        """Get the action for a single state."""
        return tf.get_default_session().run(self.action_output,
                                            feed_dict={
                                                self.states: [state],
                                                self.is_training: False
                                            })[0]

    def target_actions(self, states: np.ndarray) -> np.ndarray:
        """Get the actions for a batch of states using the target actor network."""
        return tf.get_default_session().run(self.target_action_output,
                                            feed_dict={
                                                self.states: states,
                                                self.is_training: True
                                            })

    def train(self):
        sample = self.replay_buffer.get_batch(self.config["batch_size"])

        # for n_actions = 1
        action_batch = np.resize(sample["actions"],
                                 [self.config["batch_size"], self.n_actions])

        # Calculate critic targets
        next_action_batch = self.target_actions(sample["states1"])
        q_value_batch = self.target_q(sample["states1"], next_action_batch)
        critic_targets = sample["rewards"] + (1 - sample["terminals1"]) * \
            self.config["gamma"] * q_value_batch.squeeze()
        critic_targets = np.resize(
            critic_targets, [self.config["batch_size"], 1]).astype(np.float32)
        # Update actor weights
        fetches = [self.q_value_output, self.critic_loss, self.critic_train_op]
        predicted_q, critic_loss, _ = tf.get_default_session().run(
            fetches,
            feed_dict={
                self.critic_target: critic_targets,
                self.states: sample["states0"],
                self.actions_taken: action_batch,
                self.is_training: True
            })

        summary = tf.Summary()
        summary.value.add(tag="model/critic_loss",
                          simple_value=float(critic_loss))
        summary.value.add(tag="model/predicted_q_mean",
                          simple_value=np.mean(predicted_q))
        summary.value.add(tag="model/predicted_q_std",
                          simple_value=np.std(predicted_q))
        self.summary_writer.add_summary(summary, self.n_updates)

        # Update the actor using the sampled gradient:
        action_batch_for_gradients = self.actions(sample["states0"])
        q_gradient_batch = self.actor_gradients(sample["states0"],
                                                action_batch_for_gradients)

        tf.get_default_session().run(self.actor_train_op,
                                     feed_dict={
                                         self.q_gradient_input:
                                         q_gradient_batch,
                                         self.states: sample["states0"],
                                         self.is_training: True
                                     })

        # Update the target networks
        tf.get_default_session().run(
            [self.update_targets_op, self.model_summary_op])
        self.n_updates += 1

    def noise_action(self, state: np.ndarray):
        """Choose an action based on the actor and exploration noise."""
        action = self.action(state)
        return action + self.action_noise()

    def learn(self):
        max_action = self.env.action_space.high
        with tf.Session() as sess, sess.as_default():
            sess.run(self.init_op)
            for episode in range(self.config["n_episodes"]):
                state = self.env.reset()
                episode_reward = 0
                episode_length = 0
                for _ in range(self.config["n_timesteps"]):
                    action = self.noise_action(state)
                    new_state, reward, done, _ = self.env.step(action *
                                                               max_action)
                    episode_length += 1
                    episode_reward += reward
                    self.replay_buffer.add(state, action, reward, new_state,
                                           done)
                    if self.replay_buffer.n_entries > self.config[
                            "replay_start_size"]:
                        self.train()
                    state = new_state
                    if done:
                        self.action_noise.reset()
                        summary = tf.Summary()
                        summary.value.add(tag="global/Episode_length",
                                          simple_value=float(episode_length))
                        summary.value.add(tag="global/Reward",
                                          simple_value=float(episode_reward))
                        self.summary_writer.add_summary(summary, episode)
                        self.summary_writer.flush()
                        break

Пример #3

Показать файл

Файл: MbPA.py Проект: esgl/MbPA

class MbPA:
    def __init__(self, sess, args):
        with tf.variable_scope(args.model_name):
            self.args = args
            self.learning_rate = args.learning_rate
            self.session = sess

            self.x = tf.placeholder(tf.float32, shape=[None, 784], name="x")
            self.y = tf.placeholder(tf.float32, shape=[None, 10], name="y")
            # self.trainable = tf.placeholder(tf.int32, shape=(), name="trainable")
            self.memory_sample_batch = tf.placeholder(
                tf.int16, shape=(), name="memory_sample_batch")

            self.embed = self.embedding(self.x)

            self.M = Memory(args.memory_size,
                            self.embed.get_shape()[-1],
                            self.y.get_shape()[-1])
            embs_and_values = tf.py_func(self.get_memory_sample,
                                         [self.memory_sample_batch],
                                         [tf.float64, tf.float64])

            self.memory_batch_x = tf.to_float(embs_and_values[0])
            self.memory_batch_y = tf.to_float(embs_and_values[1])
            self.xa = tf.concat(values=[self.embed, self.memory_batch_x],
                                axis=0)
            self.ya = tf.concat(values=[self.y, self.memory_batch_y], axis=0)

            self.y_ = self.output_network(self.xa)

            self.cross_entropy = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits(labels=self.ya,
                                                        logits=self.y_))
            self.optim = tf.train.GradientDescentOptimizer(
                self.learning_rate).minimize(self.cross_entropy)
            self.correct_prediction = tf.equal(tf.argmax(self.ya, 1),
                                               tf.argmax(self.y_, 1))
            self.accuracy = tf.reduce_mean(
                tf.cast(self.correct_prediction, tf.float32))

            self.session.run(tf.global_variables_initializer())

    def train(self, xs, ys, memory_sample_batch):
        # print(memory_sample_batch)
        embeds, _ = self.session.run([self.embed, self.optim],
                                     feed_dict={
                                         self.x:
                                         xs,
                                         self.y:
                                         ys,
                                         self.memory_sample_batch:
                                         memory_sample_batch
                                     })
        return embeds

    def test(self, xs_test, ys_test):
        acc = self.session.run(self.accuracy,
                               feed_dict={
                                   self.x: xs_test,
                                   self.y: ys_test,
                                   self.memory_sample_batch: 0
                               })
        return acc

    def get_memory_sample(self, batch_size):
        x, y = self.M.sample(batch_size)
        return x, y

    def add_to_memory(self, xs, ys):
        if self.args.sample_add == "normal":
            self.M.add(xs, ys)
        elif self.args.sample_add == "lru":
            self.M.add_lru(xs, ys)
        elif self.args.sample_add == "rand":
            self.M.add_rand(xs, ys)
        elif self.args.sample_add == "knn":
            self.M.add_knn(xs, ys)
        elif self.args.sample_add == "knn_lru":
            self.M.add_knn_lru(xs, ys)
        else:
            raise Exception(
                "error sample adding type, pleace choose in ['normal', 'lru', 'rand']"
            )

    @staticmethod
    def embedding(x):
        out = tf.reshape(x, [-1, 28, 28, 1])
        # convs = [(16, 8, 4), (32, 4, 2)]
        # with tf.variable_scope("conv1"):
        # out = layers.convolution2d(inputs=out,
        #                            num_outputs=16,
        #                            kernel_size=8,
        #                            stride=4,
        #                            trainable=trainable)
        # out = tf.nn.relu(out)
        # out = tf.nn.max_pool(out, ksize=[1, 2, 3, 1], strides=[1, 2, 2, 1], padding="SAME")
        with tf.variable_scope("conv2"):
            # out = layers.convolution2d(inputs=out,
            #                            num_outputs=32,
            #                            kernel_size=4,
            #                            stride=2,
            #                            trainable=trainable)
            # out = tf.nn.relu(out)
            # out = tf.nn.max_pool(out, ksize=[1, 2, 3, 1], strides=[1, 2, 2, 1], padding="SAME")
            embed = layers.flatten(out)
        return embed

    @staticmethod
    def output_network(embed):
        out = embed
        with tf.variable_scope("fc_1"):
            out = layers.fully_connected(inputs=out, num_outputs=1024)
            out = tf.nn.relu(out)
        with tf.variable_scope("fc_2"):
            out = layers.fully_connected(inputs=out, num_outputs=10)
        return out

Пример #4

Показать файл

Файл: MbPA_new.py Проект: esgl/MbPA

class MbPA_KNN_Test:
    def __init__(self, sess, args):
        self.args = args
        self.session = sess
        self.w = {}
        self.eval_w = {}
        with tf.variable_scope(self.args.model_name):
            self.x = tf.placeholder(tf.float32, shape=[None, 784], name="x")
            self.y = tf.placeholder(tf.float32, shape=[None, 10], name="y")
            self.memory_sample_batch = tf.placeholder(
                tf.int16, shape=(), name="memory_sample_batch")
            with tf.variable_scope("training"):
                with tf.variable_scope("embedding"):
                    self.out = tf.reshape(self.x, [-1, 28, 28, 1])
                    with tf.variable_scope("conv"):
                        #         # self.out, self.w["l1_w"], self.w["l1_b"] = conv2d(
                        #         #     x=self.out,
                        #         #     output_dim=16,
                        #         #     kernel_size=[8, 8],
                        #         #     stride=[4, 4],
                        #         #     activation_fn=tf.nn.relu,
                        #         #     name="conv1"
                        #         # )
                        #         # self.out, self.w["l2_w"], self.w["l2_b"] = conv2d(
                        #         #     x=self.out,
                        #         #     output_dim=32,
                        #         #     kernel_size=[4, 4],
                        #         #     stride=[2, 2],
                        #         #     activation_fn=tf.nn.relu,
                        #         #     name="conv2"
                        #         # )
                        self.embed = layers.flatten(self.out)
                #         self.embed_dim = self.embed.get_shape()[-1]
                self.M = Memory(self.args.memory_size,
                                self.x.get_shape()[-1],
                                self.y.get_shape()[-1])
                embs_and_values = tf.py_func(self.get_memory_sample,
                                             [self.memory_sample_batch],
                                             [tf.float64, tf.float64])
                self.memory_batch_x = tf.to_float(embs_and_values[0])
                self.memory_batch_y = tf.to_float(embs_and_values[1])
                self.xa = tf.concat(values=[self.x, self.memory_batch_x],
                                    axis=0)
                self.ya = tf.concat(values=[self.y, self.memory_batch_y],
                                    axis=0)
                with tf.variable_scope("fc"):
                    self.out = self.xa
                    # self.out, self.w["l3_w"], self.w["l3_b"] = linear(
                    #     input_=self.out,
                    #     output_size=1024,
                    #     activation_fn=tf.nn.relu,
                    #     name="fc_1"
                    # )
                    self.out, self.w["l4_w"], self.w["l4_b"] = linear(
                        input_=self.out, output_size=10, name="fc_2")
                    self.ya_ = self.out

                self.cross_entropy = tf.reduce_mean(
                    tf.nn.softmax_cross_entropy_with_logits(labels=self.ya,
                                                            logits=self.ya_))

                self.optim = tf.train.GradientDescentOptimizer(
                    self.args.learning_rate).minimize(self.cross_entropy)
                self.correct_prediction = tf.equal(tf.argmax(self.ya, 1),
                                                   tf.argmax(self.ya_, 1))
                self.accuracy = tf.reduce_mean(
                    tf.cast(self.correct_prediction, tf.float32))

            self.session.run(tf.global_variables_initializer())

    def update_training_to_prediction(self):
        for name in self.eval_w.keys():
            self.t_w_assign_op[name].eval(
                {self.t_w_input[name]: self.w[name].eval()})

    def train(self, xs, ys, memory_sample_batch):
        embeds, _ = self.session.run([self.embed, self.optim],
                                     feed_dict={
                                         self.x:
                                         xs,
                                         self.y:
                                         ys,
                                         self.memory_sample_batch:
                                         memory_sample_batch
                                     })
        return embeds

    def get_memory_sample(self, batch_size):
        xs, ys = self.M.sample(batch_size)
        return xs, ys

    def add_to_memory(self, xs, ys):
        if self.args.sample_add == "normal":
            self.M.add(xs, ys)
        elif self.args.sample_add == "lru":
            self.M.add_lru(xs, ys)
        elif self.args.sample_add == "rand":
            self.M.add_rand(xs, ys)
        elif self.args.sample_add == "knn":
            self.M.add_knn(xs, ys)
        elif self.args.sample_add == "knn_lru":
            self.M.add_knn_lru(xs, ys)
        else:
            raise Exception(
                "error sample adding type, pleace choose in ['normal', 'lru', 'rand']"
            )

    def test(self, xs_test, ys_test):
        # self.update_training_to_prediction()
        acc = self.session.run(self.accuracy,
                               feed_dict={
                                   self.x: xs_test,
                                   self.y: ys_test,
                                   self.memory_sample_batch: 0
                               })
        return acc

    @property
    def memory_length(self):
        return self.M.length