def test_gru(self, trained_gru, gru_net_path, cuda): logging.info('Testing GRU!') trained_gru.load_state_dict(torch.load(gru_net_path)) trained_gru.eval() trained_gru.noise = False no_episodes = 20 perf = gru_nn.test(trained_gru, self.env, no_episodes, log=True, cuda=cuda, render=True) logging.info('Average Performance:{}'.format(perf)) return perf
def bgru_train(self, bgru_net, gru_net, cuda, gru_scratch, trajectories_data_path, bgru_net_path, bgru_plot_dir, batch_size, train_epochs, gru_prob_data_path, bgru_dir): self.env.spec.reward_threshold = gru_nn.test(gru_net, self.env, 10, log=True, cuda=cuda, render=True) logging.info('Training Binary GRUNet!') bgru_net.train() _start_time = time.time() if gru_scratch: optimizer = optim.Adam(bgru_net.parameters(), lr=1e-3) train_data = tl.generate_trajectories(self.env, 3, 5, trajectories_data_path) bgru_net = gru_nn.train(bgru_net, self.env, optimizer, bgru_net_path, bgru_plot_dir, train_data, batch_size, train_epochs, cuda) else: optimizer = optim.Adam(bgru_net.parameters(), lr=1e-4) train_data = tl.generate_trajectories( self.env, 3, 5, gru_prob_data_path, copy.deepcopy(bgru_net.gru_net).cpu()) bgru_net = bgru_nn.train(bgru_net, self.env, optimizer, bgru_net_path, bgru_plot_dir, train_data, 5, train_epochs, cuda, test_episodes=1, trunc_k=100) tl.write_net_readme( bgru_net, bgru_dir, info={'time_taken': round(time.time() - _start_time, 4)})
gru_net = GRUNet(len(obs), args.gru_size, int(env.action_space.n)) gru_net.eval() bhx_net = HxQBNet(args.gru_size, args.bhx_size) if args.cuda: gru_net = gru_net.cuda() bhx_net = bhx_net.cuda() if not os.path.exists(gru_net_path): logging.info('Pre-Trained GRU model not found!') sys.exit(0) else: gru_net.load_state_dict(torch.load(gru_net_path)) gru_net.noise = False env.spec.reward_threshold = gru_nn.test(gru_net, env, 5, log=True, cuda=args.cuda, render=False) logging.info('Reward Threshold:' + str(env.spec.reward_threshold)) target_net = lambda bottle_net: MMNet(gru_net, hx_qbn=bottle_net) logging.info('Loading Data-Set') hx_train_data, hx_test_data, _, _ = tl.generate_bottleneck_data( gru_net, env, args.bn_episodes, bottleneck_data_path, cuda=args.cuda, max_steps=args.generate_max_steps) if args.bhx_train: fsm_object.bhx_train(bhx_net, hx_train_data, hx_test_data,