Пример #1
0
 def test_gru(self, trained_gru, gru_net_path, cuda):
     logging.info('Testing GRU!')
     trained_gru.load_state_dict(torch.load(gru_net_path))
     trained_gru.eval()
     trained_gru.noise = False
     no_episodes = 20
     perf = gru_nn.test(trained_gru,
                        self.env,
                        no_episodes,
                        log=True,
                        cuda=cuda,
                        render=True)
     logging.info('Average Performance:{}'.format(perf))
     return perf
Пример #2
0
 def bgru_train(self, bgru_net, gru_net, cuda, gru_scratch,
                trajectories_data_path, bgru_net_path, bgru_plot_dir,
                batch_size, train_epochs, gru_prob_data_path, bgru_dir):
     self.env.spec.reward_threshold = gru_nn.test(gru_net,
                                                  self.env,
                                                  10,
                                                  log=True,
                                                  cuda=cuda,
                                                  render=True)
     logging.info('Training Binary GRUNet!')
     bgru_net.train()
     _start_time = time.time()
     if gru_scratch:
         optimizer = optim.Adam(bgru_net.parameters(), lr=1e-3)
         train_data = tl.generate_trajectories(self.env, 3, 5,
                                               trajectories_data_path)
         bgru_net = gru_nn.train(bgru_net, self.env, optimizer,
                                 bgru_net_path, bgru_plot_dir, train_data,
                                 batch_size, train_epochs, cuda)
     else:
         optimizer = optim.Adam(bgru_net.parameters(), lr=1e-4)
         train_data = tl.generate_trajectories(
             self.env, 3, 5, gru_prob_data_path,
             copy.deepcopy(bgru_net.gru_net).cpu())
         bgru_net = bgru_nn.train(bgru_net,
                                  self.env,
                                  optimizer,
                                  bgru_net_path,
                                  bgru_plot_dir,
                                  train_data,
                                  5,
                                  train_epochs,
                                  cuda,
                                  test_episodes=1,
                                  trunc_k=100)
     tl.write_net_readme(
         bgru_net,
         bgru_dir,
         info={'time_taken': round(time.time() - _start_time, 4)})
Пример #3
0
            gru_net = GRUNet(len(obs), args.gru_size, int(env.action_space.n))
            gru_net.eval()
            bhx_net = HxQBNet(args.gru_size, args.bhx_size)
            if args.cuda:
                gru_net = gru_net.cuda()
                bhx_net = bhx_net.cuda()

            if not os.path.exists(gru_net_path):
                logging.info('Pre-Trained GRU model not found!')
                sys.exit(0)
            else:
                gru_net.load_state_dict(torch.load(gru_net_path))
            gru_net.noise = False
            env.spec.reward_threshold = gru_nn.test(gru_net,
                                                    env,
                                                    5,
                                                    log=True,
                                                    cuda=args.cuda,
                                                    render=False)
            logging.info('Reward Threshold:' + str(env.spec.reward_threshold))
            target_net = lambda bottle_net: MMNet(gru_net, hx_qbn=bottle_net)

            logging.info('Loading Data-Set')
            hx_train_data, hx_test_data, _, _ = tl.generate_bottleneck_data(
                gru_net,
                env,
                args.bn_episodes,
                bottleneck_data_path,
                cuda=args.cuda,
                max_steps=args.generate_max_steps)
            if args.bhx_train:
                fsm_object.bhx_train(bhx_net, hx_train_data, hx_test_data,