Exemplo n.º 1
0
 def OnReset(self, event):
     toolbar = self.GetToolBar()
     toolbar.EnableTool(self.start_btn.GetId(), True)
     toolbar.EnableTool(self.reset_btn.GetId(), False)
     toolbar.EnableTool(self.enable_blue.GetId(), True)
     pub.sendMessage('Reset')
     server.reset()
    def run(self):
        total_step = 1
        while self.g_ep.value < MAX_EP:
            s = reset()
            print("img_array", s)
            s = feature_vec(s)
            print("feat", s)

            # s = self.env.reset()
            # feature_vec
            buffer_s, buffer_a, buffer_r = [], [], []
            ep_r = 0.
            while True:
                # if self.name == 'w0':
                    # self.env.render()
                    # feature_vec
                a = self.lnet.choose_action(s)
                # a = self.lnet.choose_action(v_wrap(s[None, :]))
                s_, r, done = step(a)
                # s_, r, done, _ = self.env.step(a)
                # feature_vec
                print("a", a)
                print("s_", s_)
                print("r", r)
                print("done", done)
                if done:
                    r = -1
                ep_r += r
                buffer_a.append(a)
                buffer_s.append(s)
                buffer_r.append(r)

                # update global and assign to local net
                if total_step % UPDATE_GLOBAL_ITER == 0 or done:  
                    # sync
                    push_and_pull(self.opt, self.lnet, self.gnet, done, s_,
                                  buffer_s, buffer_a, buffer_r, GAMMA)
                    buffer_s, buffer_a, buffer_r = [], [], []

                    if done:  # done and print information
                        record(self.g_ep, self.g_ep_r, ep_r, self.res_queue,
                               self.name)
                        break
                s = s_
                total_step += 1
        self.res_queue.put(None)
Exemplo n.º 3
0
 def setUp(self):
     global server
     server.reset()
Exemplo n.º 4
0
 def setUp(self):
     global server
     server.reset()
Exemplo n.º 5
0
                                                        NUM_MINIBATCH)
                            break

                observation_stats = np.reshape(
                    observation_buffer, (-1, input_normalizer.num_inputs))
                input_normalizer.update(
                    sess,
                    mean=observation_stats.mean(axis=0, keepdims=True),
                    var=observation_stats.var(axis=0, keepdims=True),
                    count=observation_stats.shape[0])
                if not stats_ready:
                    progress.set_postfix_str(
                        "Initial statistics gathered, commencing training session"
                    )
                    sess.run([sync_op, set_stats_ready_op])
                    utility.apply(lambda server: server.reset(), servers)
                else:
                    progress.set_postfix_str("Update %d completed" %
                                             update_idx)

                progress.close()
                rollout_queue, stats_ready, progress = queue.Queue(
                ), True, None
                break

    if step_count > MAX_UPDATE_STEPS:
        print("Completed training process, terminating session")
        utility.apply(lambda server: server.stop(), servers)
        rollout_queue = None

utility.apply(lambda thread: thread.join(), server_threads)